1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "arch/arm64/asm_support_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "base/bit_utils_iterator.h"
24 #include "class_table.h"
25 #include "code_generator_utils.h"
26 #include "compiled_method.h"
27 #include "entrypoints/quick/quick_entrypoints.h"
28 #include "entrypoints/quick/quick_entrypoints_enum.h"
29 #include "gc/accounting/card_table.h"
30 #include "heap_poisoning.h"
31 #include "intrinsics.h"
32 #include "intrinsics_arm64.h"
33 #include "linker/arm64/relative_patcher_arm64.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "offsets.h"
39 #include "thread.h"
40 #include "utils/arm64/assembler_arm64.h"
41 #include "utils/assembler.h"
42 #include "utils/stack_checks.h"
43
44 using namespace vixl::aarch64; // NOLINT(build/namespaces)
45 using vixl::ExactAssemblyScope;
46 using vixl::CodeBufferCheckScope;
47 using vixl::EmissionCheckScope;
48
49 #ifdef __
50 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
51 #endif
52
53 namespace art {
54
55 template<class MirrorType>
56 class GcRoot;
57
58 namespace arm64 {
59
60 using helpers::ARM64EncodableConstantOrRegister;
61 using helpers::ArtVixlRegCodeCoherentForRegSet;
62 using helpers::CPURegisterFrom;
63 using helpers::DRegisterFrom;
64 using helpers::FPRegisterFrom;
65 using helpers::HeapOperand;
66 using helpers::HeapOperandFrom;
67 using helpers::InputCPURegisterAt;
68 using helpers::InputCPURegisterOrZeroRegAt;
69 using helpers::InputFPRegisterAt;
70 using helpers::InputOperandAt;
71 using helpers::InputRegisterAt;
72 using helpers::Int64ConstantFrom;
73 using helpers::IsConstantZeroBitPattern;
74 using helpers::LocationFrom;
75 using helpers::OperandFromMemOperand;
76 using helpers::OutputCPURegister;
77 using helpers::OutputFPRegister;
78 using helpers::OutputRegister;
79 using helpers::QRegisterFrom;
80 using helpers::RegisterFrom;
81 using helpers::StackOperandFrom;
82 using helpers::VIXLRegCodeFromART;
83 using helpers::WRegisterFrom;
84 using helpers::XRegisterFrom;
85
86 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
87 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
88 // generates less code/data with a small num_entries.
89 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
90
91 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
92 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
93 // For the Baker read barrier implementation using link-generated thunks we need to split
94 // the offset explicitly.
95 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
96
97 // Flags controlling the use of link-time generated thunks for Baker read barriers.
98 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
99 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
100 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
101
102 // Some instructions have special requirements for a temporary, for example
103 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
104 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
105 // loads with large offsets need a fixed register to limit the number of link-time
106 // thunks we generate. For these and similar cases, we want to reserve a specific
107 // register that's neither callee-save nor an argument register. We choose x15.
FixedTempLocation()108 inline Location FixedTempLocation() {
109 return Location::RegisterLocation(x15.GetCode());
110 }
111
ARM64Condition(IfCondition cond)112 inline Condition ARM64Condition(IfCondition cond) {
113 switch (cond) {
114 case kCondEQ: return eq;
115 case kCondNE: return ne;
116 case kCondLT: return lt;
117 case kCondLE: return le;
118 case kCondGT: return gt;
119 case kCondGE: return ge;
120 case kCondB: return lo;
121 case kCondBE: return ls;
122 case kCondA: return hi;
123 case kCondAE: return hs;
124 }
125 LOG(FATAL) << "Unreachable";
126 UNREACHABLE();
127 }
128
ARM64FPCondition(IfCondition cond,bool gt_bias)129 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
130 // The ARM64 condition codes can express all the necessary branches, see the
131 // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
132 // There is no dex instruction or HIR that would need the missing conditions
133 // "equal or unordered" or "not equal".
134 switch (cond) {
135 case kCondEQ: return eq;
136 case kCondNE: return ne /* unordered */;
137 case kCondLT: return gt_bias ? cc : lt /* unordered */;
138 case kCondLE: return gt_bias ? ls : le /* unordered */;
139 case kCondGT: return gt_bias ? hi /* unordered */ : gt;
140 case kCondGE: return gt_bias ? cs /* unordered */ : ge;
141 default:
142 LOG(FATAL) << "UNREACHABLE";
143 UNREACHABLE();
144 }
145 }
146
ARM64ReturnLocation(DataType::Type return_type)147 Location ARM64ReturnLocation(DataType::Type return_type) {
148 // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
149 // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
150 // but we use the exact registers for clarity.
151 if (return_type == DataType::Type::kFloat32) {
152 return LocationFrom(s0);
153 } else if (return_type == DataType::Type::kFloat64) {
154 return LocationFrom(d0);
155 } else if (return_type == DataType::Type::kInt64) {
156 return LocationFrom(x0);
157 } else if (return_type == DataType::Type::kVoid) {
158 return Location::NoLocation();
159 } else {
160 return LocationFrom(w0);
161 }
162 }
163
GetReturnLocation(DataType::Type return_type)164 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
165 return ARM64ReturnLocation(return_type);
166 }
167
168 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
169 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
170 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
171
172 // Calculate memory accessing operand for save/restore live registers.
SaveRestoreLiveRegistersHelper(CodeGenerator * codegen,LocationSummary * locations,int64_t spill_offset,bool is_save)173 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
174 LocationSummary* locations,
175 int64_t spill_offset,
176 bool is_save) {
177 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
178 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
179 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
180 codegen->GetNumberOfCoreRegisters(),
181 fp_spills,
182 codegen->GetNumberOfFloatingPointRegisters()));
183
184 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
185 unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
186 CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
187
188 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
189 UseScratchRegisterScope temps(masm);
190
191 Register base = masm->StackPointer();
192 int64_t core_spill_size = core_list.GetTotalSizeInBytes();
193 int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
194 int64_t reg_size = kXRegSizeInBytes;
195 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
196 uint32_t ls_access_size = WhichPowerOf2(reg_size);
197 if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
198 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
199 // If the offset does not fit in the instruction's immediate field, use an alternate register
200 // to compute the base address(float point registers spill base address).
201 Register new_base = temps.AcquireSameSizeAs(base);
202 __ Add(new_base, base, Operand(spill_offset + core_spill_size));
203 base = new_base;
204 spill_offset = -core_spill_size;
205 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
206 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
207 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
208 }
209
210 if (is_save) {
211 __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
212 __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
213 } else {
214 __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
215 __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
216 }
217 }
218
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)219 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
220 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
221 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
222 for (uint32_t i : LowToHighBits(core_spills)) {
223 // If the register holds an object, update the stack mask.
224 if (locations->RegisterContainsObject(i)) {
225 locations->SetStackBit(stack_offset / kVRegSize);
226 }
227 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
228 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
229 saved_core_stack_offsets_[i] = stack_offset;
230 stack_offset += kXRegSizeInBytes;
231 }
232
233 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
234 for (uint32_t i : LowToHighBits(fp_spills)) {
235 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
236 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
237 saved_fpu_stack_offsets_[i] = stack_offset;
238 stack_offset += kDRegSizeInBytes;
239 }
240
241 SaveRestoreLiveRegistersHelper(codegen,
242 locations,
243 codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
244 }
245
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)246 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
247 SaveRestoreLiveRegistersHelper(codegen,
248 locations,
249 codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
250 }
251
252 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
253 public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)254 explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
255
EmitNativeCode(CodeGenerator * codegen)256 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
257 LocationSummary* locations = instruction_->GetLocations();
258 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
259
260 __ Bind(GetEntryLabel());
261 if (instruction_->CanThrowIntoCatchBlock()) {
262 // Live registers will be restored in the catch block if caught.
263 SaveLiveRegisters(codegen, instruction_->GetLocations());
264 }
265 // We're moving two locations to locations that could overlap, so we need a parallel
266 // move resolver.
267 InvokeRuntimeCallingConvention calling_convention;
268 codegen->EmitParallelMoves(locations->InAt(0),
269 LocationFrom(calling_convention.GetRegisterAt(0)),
270 DataType::Type::kInt32,
271 locations->InAt(1),
272 LocationFrom(calling_convention.GetRegisterAt(1)),
273 DataType::Type::kInt32);
274 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
275 ? kQuickThrowStringBounds
276 : kQuickThrowArrayBounds;
277 arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
278 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
279 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
280 }
281
IsFatal() const282 bool IsFatal() const OVERRIDE { return true; }
283
GetDescription() const284 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
285
286 private:
287 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
288 };
289
290 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
291 public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)292 explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
293
EmitNativeCode(CodeGenerator * codegen)294 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
295 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
296 __ Bind(GetEntryLabel());
297 arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
298 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
299 }
300
IsFatal() const301 bool IsFatal() const OVERRIDE { return true; }
302
GetDescription() const303 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
304
305 private:
306 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
307 };
308
309 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
310 public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)311 LoadClassSlowPathARM64(HLoadClass* cls,
312 HInstruction* at,
313 uint32_t dex_pc,
314 bool do_clinit)
315 : SlowPathCodeARM64(at),
316 cls_(cls),
317 dex_pc_(dex_pc),
318 do_clinit_(do_clinit) {
319 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
320 }
321
EmitNativeCode(CodeGenerator * codegen)322 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
323 LocationSummary* locations = instruction_->GetLocations();
324 Location out = locations->Out();
325 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
326
327 __ Bind(GetEntryLabel());
328 SaveLiveRegisters(codegen, locations);
329
330 InvokeRuntimeCallingConvention calling_convention;
331 dex::TypeIndex type_index = cls_->GetTypeIndex();
332 __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
333 QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
334 : kQuickInitializeType;
335 arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
336 if (do_clinit_) {
337 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
338 } else {
339 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
340 }
341
342 // Move the class to the desired location.
343 if (out.IsValid()) {
344 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
345 DataType::Type type = instruction_->GetType();
346 arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
347 }
348 RestoreLiveRegisters(codegen, locations);
349 __ B(GetExitLabel());
350 }
351
GetDescription() const352 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
353
354 private:
355 // The class this slow path will load.
356 HLoadClass* const cls_;
357
358 // The dex PC of `at_`.
359 const uint32_t dex_pc_;
360
361 // Whether to initialize the class.
362 const bool do_clinit_;
363
364 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
365 };
366
367 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
368 public:
LoadStringSlowPathARM64(HLoadString * instruction)369 explicit LoadStringSlowPathARM64(HLoadString* instruction)
370 : SlowPathCodeARM64(instruction) {}
371
EmitNativeCode(CodeGenerator * codegen)372 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
373 LocationSummary* locations = instruction_->GetLocations();
374 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
375 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
376
377 __ Bind(GetEntryLabel());
378 SaveLiveRegisters(codegen, locations);
379
380 InvokeRuntimeCallingConvention calling_convention;
381 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
382 __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
383 arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
384 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
385 DataType::Type type = instruction_->GetType();
386 arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
387
388 RestoreLiveRegisters(codegen, locations);
389
390 __ B(GetExitLabel());
391 }
392
GetDescription() const393 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
394
395 private:
396 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
397 };
398
399 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
400 public:
NullCheckSlowPathARM64(HNullCheck * instr)401 explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
402
EmitNativeCode(CodeGenerator * codegen)403 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
404 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
405 __ Bind(GetEntryLabel());
406 if (instruction_->CanThrowIntoCatchBlock()) {
407 // Live registers will be restored in the catch block if caught.
408 SaveLiveRegisters(codegen, instruction_->GetLocations());
409 }
410 arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
411 instruction_,
412 instruction_->GetDexPc(),
413 this);
414 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
415 }
416
IsFatal() const417 bool IsFatal() const OVERRIDE { return true; }
418
GetDescription() const419 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
420
421 private:
422 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
423 };
424
425 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
426 public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)427 SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
428 : SlowPathCodeARM64(instruction), successor_(successor) {}
429
EmitNativeCode(CodeGenerator * codegen)430 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
431 LocationSummary* locations = instruction_->GetLocations();
432 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
433 __ Bind(GetEntryLabel());
434 SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD.
435 arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
436 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
437 RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD.
438 if (successor_ == nullptr) {
439 __ B(GetReturnLabel());
440 } else {
441 __ B(arm64_codegen->GetLabelOf(successor_));
442 }
443 }
444
GetReturnLabel()445 vixl::aarch64::Label* GetReturnLabel() {
446 DCHECK(successor_ == nullptr);
447 return &return_label_;
448 }
449
GetSuccessor() const450 HBasicBlock* GetSuccessor() const {
451 return successor_;
452 }
453
GetDescription() const454 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
455
456 private:
457 // If not null, the block to branch to after the suspend check.
458 HBasicBlock* const successor_;
459
460 // If `successor_` is null, the label to branch to after the suspend check.
461 vixl::aarch64::Label return_label_;
462
463 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
464 };
465
466 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
467 public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)468 TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
469 : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
470
EmitNativeCode(CodeGenerator * codegen)471 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
472 LocationSummary* locations = instruction_->GetLocations();
473
474 DCHECK(instruction_->IsCheckCast()
475 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
476 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
477 uint32_t dex_pc = instruction_->GetDexPc();
478
479 __ Bind(GetEntryLabel());
480
481 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
482 SaveLiveRegisters(codegen, locations);
483 }
484
485 // We're moving two locations to locations that could overlap, so we need a parallel
486 // move resolver.
487 InvokeRuntimeCallingConvention calling_convention;
488 codegen->EmitParallelMoves(locations->InAt(0),
489 LocationFrom(calling_convention.GetRegisterAt(0)),
490 DataType::Type::kReference,
491 locations->InAt(1),
492 LocationFrom(calling_convention.GetRegisterAt(1)),
493 DataType::Type::kReference);
494 if (instruction_->IsInstanceOf()) {
495 arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
496 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
497 DataType::Type ret_type = instruction_->GetType();
498 Location ret_loc = calling_convention.GetReturnLocation(ret_type);
499 arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
500 } else {
501 DCHECK(instruction_->IsCheckCast());
502 arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
503 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
504 }
505
506 if (!is_fatal_) {
507 RestoreLiveRegisters(codegen, locations);
508 __ B(GetExitLabel());
509 }
510 }
511
GetDescription() const512 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
IsFatal() const513 bool IsFatal() const OVERRIDE { return is_fatal_; }
514
515 private:
516 const bool is_fatal_;
517
518 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
519 };
520
521 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
522 public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)523 explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
524 : SlowPathCodeARM64(instruction) {}
525
EmitNativeCode(CodeGenerator * codegen)526 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
527 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
528 __ Bind(GetEntryLabel());
529 LocationSummary* locations = instruction_->GetLocations();
530 SaveLiveRegisters(codegen, locations);
531 InvokeRuntimeCallingConvention calling_convention;
532 __ Mov(calling_convention.GetRegisterAt(0),
533 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
534 arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
535 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
536 }
537
GetDescription() const538 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
539
540 private:
541 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
542 };
543
544 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
545 public:
ArraySetSlowPathARM64(HInstruction * instruction)546 explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
547
EmitNativeCode(CodeGenerator * codegen)548 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
549 LocationSummary* locations = instruction_->GetLocations();
550 __ Bind(GetEntryLabel());
551 SaveLiveRegisters(codegen, locations);
552
553 InvokeRuntimeCallingConvention calling_convention;
554 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
555 parallel_move.AddMove(
556 locations->InAt(0),
557 LocationFrom(calling_convention.GetRegisterAt(0)),
558 DataType::Type::kReference,
559 nullptr);
560 parallel_move.AddMove(
561 locations->InAt(1),
562 LocationFrom(calling_convention.GetRegisterAt(1)),
563 DataType::Type::kInt32,
564 nullptr);
565 parallel_move.AddMove(
566 locations->InAt(2),
567 LocationFrom(calling_convention.GetRegisterAt(2)),
568 DataType::Type::kReference,
569 nullptr);
570 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
571
572 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
573 arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
574 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
575 RestoreLiveRegisters(codegen, locations);
576 __ B(GetExitLabel());
577 }
578
GetDescription() const579 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
580
581 private:
582 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
583 };
584
EmitTable(CodeGeneratorARM64 * codegen)585 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
586 uint32_t num_entries = switch_instr_->GetNumEntries();
587 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
588
589 // We are about to use the assembler to place literals directly. Make sure we have enough
590 // underlying code buffer and we have generated the jump table with right size.
591 EmissionCheckScope scope(codegen->GetVIXLAssembler(),
592 num_entries * sizeof(int32_t),
593 CodeBufferCheckScope::kExactSize);
594
595 __ Bind(&table_start_);
596 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
597 for (uint32_t i = 0; i < num_entries; i++) {
598 vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
599 DCHECK(target_label->IsBound());
600 ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
601 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
602 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
603 Literal<int32_t> literal(jump_offset);
604 __ place(&literal);
605 }
606 }
607
608 // Abstract base class for read barrier slow paths marking a reference
609 // `ref`.
610 //
611 // Argument `entrypoint` must be a register location holding the read
612 // barrier marking runtime entry point to be invoked or an empty
613 // location; in the latter case, the read barrier marking runtime
614 // entry point will be loaded by the slow path code itself.
615 class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
616 protected:
ReadBarrierMarkSlowPathBaseARM64(HInstruction * instruction,Location ref,Location entrypoint)617 ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
618 : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
619 DCHECK(kEmitCompilerReadBarrier);
620 }
621
GetDescription() const622 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
623
624 // Generate assembly code calling the read barrier marking runtime
625 // entry point (ReadBarrierMarkRegX).
GenerateReadBarrierMarkRuntimeCall(CodeGenerator * codegen)626 void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
627 // No need to save live registers; it's taken care of by the
628 // entrypoint. Also, there is no need to update the stack mask,
629 // as this runtime call will not trigger a garbage collection.
630 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
631 DCHECK_NE(ref_.reg(), LR);
632 DCHECK_NE(ref_.reg(), WSP);
633 DCHECK_NE(ref_.reg(), WZR);
634 // IP0 is used internally by the ReadBarrierMarkRegX entry point
635 // as a temporary, it cannot be the entry point's input/output.
636 DCHECK_NE(ref_.reg(), IP0);
637 DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
638 // "Compact" slow path, saving two moves.
639 //
640 // Instead of using the standard runtime calling convention (input
641 // and output in W0):
642 //
643 // W0 <- ref
644 // W0 <- ReadBarrierMark(W0)
645 // ref <- W0
646 //
647 // we just use rX (the register containing `ref`) as input and output
648 // of a dedicated entrypoint:
649 //
650 // rX <- ReadBarrierMarkRegX(rX)
651 //
652 if (entrypoint_.IsValid()) {
653 arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
654 __ Blr(XRegisterFrom(entrypoint_));
655 } else {
656 // Entrypoint is not already loaded, load from the thread.
657 int32_t entry_point_offset =
658 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
659 // This runtime call does not require a stack map.
660 arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
661 }
662 }
663
664 // The location (register) of the marked object reference.
665 const Location ref_;
666
667 // The location of the entrypoint if it is already loaded.
668 const Location entrypoint_;
669
670 private:
671 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
672 };
673
674 // Slow path marking an object reference `ref` during a read
675 // barrier. The field `obj.field` in the object `obj` holding this
676 // reference does not get updated by this slow path after marking.
677 //
678 // This means that after the execution of this slow path, `ref` will
679 // always be up-to-date, but `obj.field` may not; i.e., after the
680 // flip, `ref` will be a to-space reference, but `obj.field` will
681 // probably still be a from-space reference (unless it gets updated by
682 // another thread, or if another thread installed another object
683 // reference (different from `ref`) in `obj.field`).
684 //
685 // Argument `entrypoint` must be a register location holding the read
686 // barrier marking runtime entry point to be invoked or an empty
687 // location; in the latter case, the read barrier marking runtime
688 // entry point will be loaded by the slow path code itself.
689 class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
690 public:
ReadBarrierMarkSlowPathARM64(HInstruction * instruction,Location ref,Location entrypoint=Location::NoLocation ())691 ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
692 Location ref,
693 Location entrypoint = Location::NoLocation())
694 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
695 DCHECK(kEmitCompilerReadBarrier);
696 }
697
GetDescription() const698 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
699
EmitNativeCode(CodeGenerator * codegen)700 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
701 LocationSummary* locations = instruction_->GetLocations();
702 DCHECK(locations->CanCall());
703 DCHECK(ref_.IsRegister()) << ref_;
704 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
705 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
706 << "Unexpected instruction in read barrier marking slow path: "
707 << instruction_->DebugName();
708
709 __ Bind(GetEntryLabel());
710 GenerateReadBarrierMarkRuntimeCall(codegen);
711 __ B(GetExitLabel());
712 }
713
714 private:
715 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
716 };
717
718 // Slow path loading `obj`'s lock word, loading a reference from
719 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
720 // marking `ref` if `obj` is gray according to the lock word (Baker
721 // read barrier). The field `obj.field` in the object `obj` holding
722 // this reference does not get updated by this slow path after marking
723 // (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
724 // below for that).
725 //
726 // This means that after the execution of this slow path, `ref` will
727 // always be up-to-date, but `obj.field` may not; i.e., after the
728 // flip, `ref` will be a to-space reference, but `obj.field` will
729 // probably still be a from-space reference (unless it gets updated by
730 // another thread, or if another thread installed another object
731 // reference (different from `ref`) in `obj.field`).
732 //
733 // Argument `entrypoint` must be a register location holding the read
734 // barrier marking runtime entry point to be invoked or an empty
735 // location; in the latter case, the read barrier marking runtime
736 // entry point will be loaded by the slow path code itself.
737 class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
738 public:
LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire,Register temp,Location entrypoint=Location::NoLocation ())739 LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
740 Location ref,
741 Register obj,
742 uint32_t offset,
743 Location index,
744 size_t scale_factor,
745 bool needs_null_check,
746 bool use_load_acquire,
747 Register temp,
748 Location entrypoint = Location::NoLocation())
749 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
750 obj_(obj),
751 offset_(offset),
752 index_(index),
753 scale_factor_(scale_factor),
754 needs_null_check_(needs_null_check),
755 use_load_acquire_(use_load_acquire),
756 temp_(temp) {
757 DCHECK(kEmitCompilerReadBarrier);
758 DCHECK(kUseBakerReadBarrier);
759 }
760
GetDescription() const761 const char* GetDescription() const OVERRIDE {
762 return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
763 }
764
EmitNativeCode(CodeGenerator * codegen)765 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
766 LocationSummary* locations = instruction_->GetLocations();
767 DCHECK(locations->CanCall());
768 DCHECK(ref_.IsRegister()) << ref_;
769 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
770 DCHECK(obj_.IsW());
771 DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
772 DCHECK(instruction_->IsInstanceFieldGet() ||
773 instruction_->IsStaticFieldGet() ||
774 instruction_->IsArrayGet() ||
775 instruction_->IsArraySet() ||
776 instruction_->IsInstanceOf() ||
777 instruction_->IsCheckCast() ||
778 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
779 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
780 << "Unexpected instruction in read barrier marking slow path: "
781 << instruction_->DebugName();
782 // The read barrier instrumentation of object ArrayGet
783 // instructions does not support the HIntermediateAddress
784 // instruction.
785 DCHECK(!(instruction_->IsArrayGet() &&
786 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
787
788 // Temporary register `temp_`, used to store the lock word, must
789 // not be IP0 nor IP1, as we may use them to emit the reference
790 // load (in the call to GenerateRawReferenceLoad below), and we
791 // need the lock word to still be in `temp_` after the reference
792 // load.
793 DCHECK_NE(LocationFrom(temp_).reg(), IP0);
794 DCHECK_NE(LocationFrom(temp_).reg(), IP1);
795
796 __ Bind(GetEntryLabel());
797
798 // When using MaybeGenerateReadBarrierSlow, the read barrier call is
799 // inserted after the original load. However, in fast path based
800 // Baker's read barriers, we need to perform the load of
801 // mirror::Object::monitor_ *before* the original reference load.
802 // This load-load ordering is required by the read barrier.
803 // The slow path (for Baker's algorithm) should look like:
804 //
805 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
806 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
807 // HeapReference<mirror::Object> ref = *src; // Original reference load.
808 // bool is_gray = (rb_state == ReadBarrier::GrayState());
809 // if (is_gray) {
810 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
811 // }
812 //
813 // Note: the original implementation in ReadBarrier::Barrier is
814 // slightly more complex as it performs additional checks that we do
815 // not do here for performance reasons.
816
817 // /* int32_t */ monitor = obj->monitor_
818 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
819 __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
820 if (needs_null_check_) {
821 codegen->MaybeRecordImplicitNullCheck(instruction_);
822 }
823 // /* LockWord */ lock_word = LockWord(monitor)
824 static_assert(sizeof(LockWord) == sizeof(int32_t),
825 "art::LockWord and int32_t have different sizes.");
826
827 // Introduce a dependency on the lock_word including rb_state,
828 // to prevent load-load reordering, and without using
829 // a memory barrier (which would be more expensive).
830 // `obj` is unchanged by this operation, but its value now depends
831 // on `temp`.
832 __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
833
834 // The actual reference load.
835 // A possible implicit null check has already been handled above.
836 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
837 arm64_codegen->GenerateRawReferenceLoad(instruction_,
838 ref_,
839 obj_,
840 offset_,
841 index_,
842 scale_factor_,
843 /* needs_null_check */ false,
844 use_load_acquire_);
845
846 // Mark the object `ref` when `obj` is gray.
847 //
848 // if (rb_state == ReadBarrier::GrayState())
849 // ref = ReadBarrier::Mark(ref);
850 //
851 // Given the numeric representation, it's enough to check the low bit of the rb_state.
852 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
853 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
854 __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
855 GenerateReadBarrierMarkRuntimeCall(codegen);
856
857 __ B(GetExitLabel());
858 }
859
860 private:
861 // The register containing the object holding the marked object reference field.
862 Register obj_;
863 // The offset, index and scale factor to access the reference in `obj_`.
864 uint32_t offset_;
865 Location index_;
866 size_t scale_factor_;
867 // Is a null check required?
868 bool needs_null_check_;
869 // Should this reference load use Load-Acquire semantics?
870 bool use_load_acquire_;
871 // A temporary register used to hold the lock word of `obj_`.
872 Register temp_;
873
874 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
875 };
876
877 // Slow path loading `obj`'s lock word, loading a reference from
878 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
879 // marking `ref` if `obj` is gray according to the lock word (Baker
880 // read barrier). If needed, this slow path also atomically updates
881 // the field `obj.field` in the object `obj` holding this reference
882 // after marking (contrary to
883 // LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
884 // tries to update `obj.field`).
885 //
886 // This means that after the execution of this slow path, both `ref`
887 // and `obj.field` will be up-to-date; i.e., after the flip, both will
888 // hold the same to-space reference (unless another thread installed
889 // another object reference (different from `ref`) in `obj.field`).
890 //
891 // Argument `entrypoint` must be a register location holding the read
892 // barrier marking runtime entry point to be invoked or an empty
893 // location; in the latter case, the read barrier marking runtime
894 // entry point will be loaded by the slow path code itself.
895 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
896 : public ReadBarrierMarkSlowPathBaseARM64 {
897 public:
LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire,Register temp,Location entrypoint=Location::NoLocation ())898 LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
899 HInstruction* instruction,
900 Location ref,
901 Register obj,
902 uint32_t offset,
903 Location index,
904 size_t scale_factor,
905 bool needs_null_check,
906 bool use_load_acquire,
907 Register temp,
908 Location entrypoint = Location::NoLocation())
909 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
910 obj_(obj),
911 offset_(offset),
912 index_(index),
913 scale_factor_(scale_factor),
914 needs_null_check_(needs_null_check),
915 use_load_acquire_(use_load_acquire),
916 temp_(temp) {
917 DCHECK(kEmitCompilerReadBarrier);
918 DCHECK(kUseBakerReadBarrier);
919 }
920
GetDescription() const921 const char* GetDescription() const OVERRIDE {
922 return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
923 }
924
EmitNativeCode(CodeGenerator * codegen)925 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
926 LocationSummary* locations = instruction_->GetLocations();
927 Register ref_reg = WRegisterFrom(ref_);
928 DCHECK(locations->CanCall());
929 DCHECK(ref_.IsRegister()) << ref_;
930 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
931 DCHECK(obj_.IsW());
932 DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
933
934 // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
935 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
936 << "Unexpected instruction in read barrier marking and field updating slow path: "
937 << instruction_->DebugName();
938 DCHECK(instruction_->GetLocations()->Intrinsified());
939 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
940 DCHECK_EQ(offset_, 0u);
941 DCHECK_EQ(scale_factor_, 0u);
942 DCHECK_EQ(use_load_acquire_, false);
943 // The location of the offset of the marked reference field within `obj_`.
944 Location field_offset = index_;
945 DCHECK(field_offset.IsRegister()) << field_offset;
946
947 // Temporary register `temp_`, used to store the lock word, must
948 // not be IP0 nor IP1, as we may use them to emit the reference
949 // load (in the call to GenerateRawReferenceLoad below), and we
950 // need the lock word to still be in `temp_` after the reference
951 // load.
952 DCHECK_NE(LocationFrom(temp_).reg(), IP0);
953 DCHECK_NE(LocationFrom(temp_).reg(), IP1);
954
955 __ Bind(GetEntryLabel());
956
957 // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's:
958 //
959 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
960 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
961 // HeapReference<mirror::Object> ref = *src; // Original reference load.
962 // bool is_gray = (rb_state == ReadBarrier::GrayState());
963 // if (is_gray) {
964 // old_ref = ref;
965 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
966 // compareAndSwapObject(obj, field_offset, old_ref, ref);
967 // }
968
969 // /* int32_t */ monitor = obj->monitor_
970 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
971 __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
972 if (needs_null_check_) {
973 codegen->MaybeRecordImplicitNullCheck(instruction_);
974 }
975 // /* LockWord */ lock_word = LockWord(monitor)
976 static_assert(sizeof(LockWord) == sizeof(int32_t),
977 "art::LockWord and int32_t have different sizes.");
978
979 // Introduce a dependency on the lock_word including rb_state,
980 // to prevent load-load reordering, and without using
981 // a memory barrier (which would be more expensive).
982 // `obj` is unchanged by this operation, but its value now depends
983 // on `temp`.
984 __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
985
986 // The actual reference load.
987 // A possible implicit null check has already been handled above.
988 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
989 arm64_codegen->GenerateRawReferenceLoad(instruction_,
990 ref_,
991 obj_,
992 offset_,
993 index_,
994 scale_factor_,
995 /* needs_null_check */ false,
996 use_load_acquire_);
997
998 // Mark the object `ref` when `obj` is gray.
999 //
1000 // if (rb_state == ReadBarrier::GrayState())
1001 // ref = ReadBarrier::Mark(ref);
1002 //
1003 // Given the numeric representation, it's enough to check the low bit of the rb_state.
1004 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
1005 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1006 __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
1007
1008 // Save the old value of the reference before marking it.
1009 // Note that we cannot use IP to save the old reference, as IP is
1010 // used internally by the ReadBarrierMarkRegX entry point, and we
1011 // need the old reference after the call to that entry point.
1012 DCHECK_NE(LocationFrom(temp_).reg(), IP0);
1013 __ Mov(temp_.W(), ref_reg);
1014
1015 GenerateReadBarrierMarkRuntimeCall(codegen);
1016
1017 // If the new reference is different from the old reference,
1018 // update the field in the holder (`*(obj_ + field_offset)`).
1019 //
1020 // Note that this field could also hold a different object, if
1021 // another thread had concurrently changed it. In that case, the
1022 // LDXR/CMP/BNE sequence of instructions in the compare-and-set
1023 // (CAS) operation below would abort the CAS, leaving the field
1024 // as-is.
1025 __ Cmp(temp_.W(), ref_reg);
1026 __ B(eq, GetExitLabel());
1027
1028 // Update the the holder's field atomically. This may fail if
1029 // mutator updates before us, but it's OK. This is achieved
1030 // using a strong compare-and-set (CAS) operation with relaxed
1031 // memory synchronization ordering, where the expected value is
1032 // the old reference and the desired value is the new reference.
1033
1034 MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
1035 UseScratchRegisterScope temps(masm);
1036
1037 // Convenience aliases.
1038 Register base = obj_.W();
1039 Register offset = XRegisterFrom(field_offset);
1040 Register expected = temp_.W();
1041 Register value = ref_reg;
1042 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
1043 Register tmp_value = temps.AcquireW(); // Value in memory.
1044
1045 __ Add(tmp_ptr, base.X(), Operand(offset));
1046
1047 if (kPoisonHeapReferences) {
1048 arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
1049 if (value.Is(expected)) {
1050 // Do not poison `value`, as it is the same register as
1051 // `expected`, which has just been poisoned.
1052 } else {
1053 arm64_codegen->GetAssembler()->PoisonHeapReference(value);
1054 }
1055 }
1056
1057 // do {
1058 // tmp_value = [tmp_ptr] - expected;
1059 // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
1060
1061 vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
1062 __ Bind(&loop_head);
1063 __ Ldxr(tmp_value, MemOperand(tmp_ptr));
1064 __ Cmp(tmp_value, expected);
1065 __ B(&comparison_failed, ne);
1066 __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
1067 __ Cbnz(tmp_value, &loop_head);
1068 __ B(&exit_loop);
1069 __ Bind(&comparison_failed);
1070 __ Clrex();
1071 __ Bind(&exit_loop);
1072
1073 if (kPoisonHeapReferences) {
1074 arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
1075 if (value.Is(expected)) {
1076 // Do not unpoison `value`, as it is the same register as
1077 // `expected`, which has just been unpoisoned.
1078 } else {
1079 arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
1080 }
1081 }
1082
1083 __ B(GetExitLabel());
1084 }
1085
1086 private:
1087 // The register containing the object holding the marked object reference field.
1088 const Register obj_;
1089 // The offset, index and scale factor to access the reference in `obj_`.
1090 uint32_t offset_;
1091 Location index_;
1092 size_t scale_factor_;
1093 // Is a null check required?
1094 bool needs_null_check_;
1095 // Should this reference load use Load-Acquire semantics?
1096 bool use_load_acquire_;
1097 // A temporary register used to hold the lock word of `obj_`; and
1098 // also to hold the original reference value, when the reference is
1099 // marked.
1100 const Register temp_;
1101
1102 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
1103 };
1104
1105 // Slow path generating a read barrier for a heap reference.
1106 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
1107 public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)1108 ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
1109 Location out,
1110 Location ref,
1111 Location obj,
1112 uint32_t offset,
1113 Location index)
1114 : SlowPathCodeARM64(instruction),
1115 out_(out),
1116 ref_(ref),
1117 obj_(obj),
1118 offset_(offset),
1119 index_(index) {
1120 DCHECK(kEmitCompilerReadBarrier);
1121 // If `obj` is equal to `out` or `ref`, it means the initial object
1122 // has been overwritten by (or after) the heap object reference load
1123 // to be instrumented, e.g.:
1124 //
1125 // __ Ldr(out, HeapOperand(out, class_offset);
1126 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
1127 //
1128 // In that case, we have lost the information about the original
1129 // object, and the emitted read barrier cannot work properly.
1130 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
1131 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
1132 }
1133
EmitNativeCode(CodeGenerator * codegen)1134 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1135 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1136 LocationSummary* locations = instruction_->GetLocations();
1137 DataType::Type type = DataType::Type::kReference;
1138 DCHECK(locations->CanCall());
1139 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1140 DCHECK(instruction_->IsInstanceFieldGet() ||
1141 instruction_->IsStaticFieldGet() ||
1142 instruction_->IsArrayGet() ||
1143 instruction_->IsInstanceOf() ||
1144 instruction_->IsCheckCast() ||
1145 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
1146 << "Unexpected instruction in read barrier for heap reference slow path: "
1147 << instruction_->DebugName();
1148 // The read barrier instrumentation of object ArrayGet
1149 // instructions does not support the HIntermediateAddress
1150 // instruction.
1151 DCHECK(!(instruction_->IsArrayGet() &&
1152 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
1153
1154 __ Bind(GetEntryLabel());
1155
1156 SaveLiveRegisters(codegen, locations);
1157
1158 // We may have to change the index's value, but as `index_` is a
1159 // constant member (like other "inputs" of this slow path),
1160 // introduce a copy of it, `index`.
1161 Location index = index_;
1162 if (index_.IsValid()) {
1163 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
1164 if (instruction_->IsArrayGet()) {
1165 // Compute the actual memory offset and store it in `index`.
1166 Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
1167 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
1168 if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
1169 // We are about to change the value of `index_reg` (see the
1170 // calls to vixl::MacroAssembler::Lsl and
1171 // vixl::MacroAssembler::Mov below), but it has
1172 // not been saved by the previous call to
1173 // art::SlowPathCode::SaveLiveRegisters, as it is a
1174 // callee-save register --
1175 // art::SlowPathCode::SaveLiveRegisters does not consider
1176 // callee-save registers, as it has been designed with the
1177 // assumption that callee-save registers are supposed to be
1178 // handled by the called function. So, as a callee-save
1179 // register, `index_reg` _would_ eventually be saved onto
1180 // the stack, but it would be too late: we would have
1181 // changed its value earlier. Therefore, we manually save
1182 // it here into another freely available register,
1183 // `free_reg`, chosen of course among the caller-save
1184 // registers (as a callee-save `free_reg` register would
1185 // exhibit the same problem).
1186 //
1187 // Note we could have requested a temporary register from
1188 // the register allocator instead; but we prefer not to, as
1189 // this is a slow path, and we know we can find a
1190 // caller-save register that is available.
1191 Register free_reg = FindAvailableCallerSaveRegister(codegen);
1192 __ Mov(free_reg.W(), index_reg);
1193 index_reg = free_reg;
1194 index = LocationFrom(index_reg);
1195 } else {
1196 // The initial register stored in `index_` has already been
1197 // saved in the call to art::SlowPathCode::SaveLiveRegisters
1198 // (as it is not a callee-save register), so we can freely
1199 // use it.
1200 }
1201 // Shifting the index value contained in `index_reg` by the scale
1202 // factor (2) cannot overflow in practice, as the runtime is
1203 // unable to allocate object arrays with a size larger than
1204 // 2^26 - 1 (that is, 2^28 - 4 bytes).
1205 __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
1206 static_assert(
1207 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
1208 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
1209 __ Add(index_reg, index_reg, Operand(offset_));
1210 } else {
1211 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
1212 // intrinsics, `index_` is not shifted by a scale factor of 2
1213 // (as in the case of ArrayGet), as it is actually an offset
1214 // to an object field within an object.
1215 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
1216 DCHECK(instruction_->GetLocations()->Intrinsified());
1217 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
1218 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
1219 << instruction_->AsInvoke()->GetIntrinsic();
1220 DCHECK_EQ(offset_, 0u);
1221 DCHECK(index_.IsRegister());
1222 }
1223 }
1224
1225 // We're moving two or three locations to locations that could
1226 // overlap, so we need a parallel move resolver.
1227 InvokeRuntimeCallingConvention calling_convention;
1228 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
1229 parallel_move.AddMove(ref_,
1230 LocationFrom(calling_convention.GetRegisterAt(0)),
1231 type,
1232 nullptr);
1233 parallel_move.AddMove(obj_,
1234 LocationFrom(calling_convention.GetRegisterAt(1)),
1235 type,
1236 nullptr);
1237 if (index.IsValid()) {
1238 parallel_move.AddMove(index,
1239 LocationFrom(calling_convention.GetRegisterAt(2)),
1240 DataType::Type::kInt32,
1241 nullptr);
1242 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
1243 } else {
1244 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
1245 arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
1246 }
1247 arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
1248 instruction_,
1249 instruction_->GetDexPc(),
1250 this);
1251 CheckEntrypointTypes<
1252 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
1253 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1254
1255 RestoreLiveRegisters(codegen, locations);
1256
1257 __ B(GetExitLabel());
1258 }
1259
GetDescription() const1260 const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
1261
1262 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)1263 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
1264 size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
1265 size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
1266 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
1267 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
1268 return Register(VIXLRegCodeFromART(i), kXRegSize);
1269 }
1270 }
1271 // We shall never fail to find a free caller-save register, as
1272 // there are more than two core caller-save registers on ARM64
1273 // (meaning it is possible to find one which is different from
1274 // `ref` and `obj`).
1275 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
1276 LOG(FATAL) << "Could not find a free register";
1277 UNREACHABLE();
1278 }
1279
1280 const Location out_;
1281 const Location ref_;
1282 const Location obj_;
1283 const uint32_t offset_;
1284 // An additional location containing an index to an array.
1285 // Only used for HArrayGet and the UnsafeGetObject &
1286 // UnsafeGetObjectVolatile intrinsics.
1287 const Location index_;
1288
1289 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
1290 };
1291
1292 // Slow path generating a read barrier for a GC root.
1293 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
1294 public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)1295 ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
1296 : SlowPathCodeARM64(instruction), out_(out), root_(root) {
1297 DCHECK(kEmitCompilerReadBarrier);
1298 }
1299
EmitNativeCode(CodeGenerator * codegen)1300 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1301 LocationSummary* locations = instruction_->GetLocations();
1302 DataType::Type type = DataType::Type::kReference;
1303 DCHECK(locations->CanCall());
1304 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1305 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
1306 << "Unexpected instruction in read barrier for GC root slow path: "
1307 << instruction_->DebugName();
1308
1309 __ Bind(GetEntryLabel());
1310 SaveLiveRegisters(codegen, locations);
1311
1312 InvokeRuntimeCallingConvention calling_convention;
1313 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1314 // The argument of the ReadBarrierForRootSlow is not a managed
1315 // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
1316 // thus we need a 64-bit move here, and we cannot use
1317 //
1318 // arm64_codegen->MoveLocation(
1319 // LocationFrom(calling_convention.GetRegisterAt(0)),
1320 // root_,
1321 // type);
1322 //
1323 // which would emit a 32-bit move, as `type` is a (32-bit wide)
1324 // reference type (`DataType::Type::kReference`).
1325 __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
1326 arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
1327 instruction_,
1328 instruction_->GetDexPc(),
1329 this);
1330 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
1331 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1332
1333 RestoreLiveRegisters(codegen, locations);
1334 __ B(GetExitLabel());
1335 }
1336
GetDescription() const1337 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
1338
1339 private:
1340 const Location out_;
1341 const Location root_;
1342
1343 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
1344 };
1345
1346 #undef __
1347
GetNextLocation(DataType::Type type)1348 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
1349 Location next_location;
1350 if (type == DataType::Type::kVoid) {
1351 LOG(FATAL) << "Unreachable type " << type;
1352 }
1353
1354 if (DataType::IsFloatingPointType(type) &&
1355 (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
1356 next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
1357 } else if (!DataType::IsFloatingPointType(type) &&
1358 (gp_index_ < calling_convention.GetNumberOfRegisters())) {
1359 next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
1360 } else {
1361 size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
1362 next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
1363 : Location::StackSlot(stack_offset);
1364 }
1365
1366 // Space on the stack is reserved for all arguments.
1367 stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
1368 return next_location;
1369 }
1370
GetMethodLocation() const1371 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
1372 return LocationFrom(kArtMethodRegister);
1373 }
1374
CodeGeneratorARM64(HGraph * graph,const Arm64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1375 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
1376 const Arm64InstructionSetFeatures& isa_features,
1377 const CompilerOptions& compiler_options,
1378 OptimizingCompilerStats* stats)
1379 : CodeGenerator(graph,
1380 kNumberOfAllocatableRegisters,
1381 kNumberOfAllocatableFPRegisters,
1382 kNumberOfAllocatableRegisterPairs,
1383 callee_saved_core_registers.GetList(),
1384 callee_saved_fp_registers.GetList(),
1385 compiler_options,
1386 stats),
1387 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1388 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1389 location_builder_(graph, this),
1390 instruction_visitor_(graph, this),
1391 move_resolver_(graph->GetAllocator(), this),
1392 assembler_(graph->GetAllocator()),
1393 isa_features_(isa_features),
1394 uint32_literals_(std::less<uint32_t>(),
1395 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1396 uint64_literals_(std::less<uint64_t>(),
1397 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1398 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1399 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1400 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1401 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1402 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1403 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1404 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1405 jit_string_patches_(StringReferenceValueComparator(),
1406 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1407 jit_class_patches_(TypeReferenceValueComparator(),
1408 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1409 // Save the link register (containing the return address) to mimic Quick.
1410 AddAllocatedRegister(LocationFrom(lr));
1411 }
1412
1413 #define __ GetVIXLAssembler()->
1414
EmitJumpTables()1415 void CodeGeneratorARM64::EmitJumpTables() {
1416 for (auto&& jump_table : jump_tables_) {
1417 jump_table->EmitTable(this);
1418 }
1419 }
1420
Finalize(CodeAllocator * allocator)1421 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
1422 EmitJumpTables();
1423 // Ensure we emit the literal pool.
1424 __ FinalizeCode();
1425
1426 CodeGenerator::Finalize(allocator);
1427 }
1428
PrepareForEmitNativeCode()1429 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1430 // Note: There are 6 kinds of moves:
1431 // 1. constant -> GPR/FPR (non-cycle)
1432 // 2. constant -> stack (non-cycle)
1433 // 3. GPR/FPR -> GPR/FPR
1434 // 4. GPR/FPR -> stack
1435 // 5. stack -> GPR/FPR
1436 // 6. stack -> stack (non-cycle)
1437 // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1438 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1439 // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1440 // dependency.
1441 vixl_temps_.Open(GetVIXLAssembler());
1442 }
1443
FinishEmitNativeCode()1444 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1445 vixl_temps_.Close();
1446 }
1447
AllocateScratchLocationFor(Location::Kind kind)1448 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1449 DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1450 || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1451 || kind == Location::kSIMDStackSlot);
1452 kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1453 ? Location::kFpuRegister
1454 : Location::kRegister;
1455 Location scratch = GetScratchLocation(kind);
1456 if (!scratch.Equals(Location::NoLocation())) {
1457 return scratch;
1458 }
1459 // Allocate from VIXL temp registers.
1460 if (kind == Location::kRegister) {
1461 scratch = LocationFrom(vixl_temps_.AcquireX());
1462 } else {
1463 DCHECK_EQ(kind, Location::kFpuRegister);
1464 scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
1465 ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
1466 : vixl_temps_.AcquireD());
1467 }
1468 AddScratchLocation(scratch);
1469 return scratch;
1470 }
1471
FreeScratchLocation(Location loc)1472 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1473 if (loc.IsRegister()) {
1474 vixl_temps_.Release(XRegisterFrom(loc));
1475 } else {
1476 DCHECK(loc.IsFpuRegister());
1477 vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
1478 }
1479 RemoveScratchLocation(loc);
1480 }
1481
EmitMove(size_t index)1482 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1483 MoveOperands* move = moves_[index];
1484 codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1485 }
1486
GenerateFrameEntry()1487 void CodeGeneratorARM64::GenerateFrameEntry() {
1488 MacroAssembler* masm = GetVIXLAssembler();
1489 __ Bind(&frame_entry_label_);
1490
1491 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1492 UseScratchRegisterScope temps(masm);
1493 Register temp = temps.AcquireX();
1494 __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
1495 __ Add(temp, temp, 1);
1496 __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
1497 }
1498
1499 bool do_overflow_check =
1500 FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1501 if (do_overflow_check) {
1502 UseScratchRegisterScope temps(masm);
1503 Register temp = temps.AcquireX();
1504 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1505 __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1506 {
1507 // Ensure that between load and RecordPcInfo there are no pools emitted.
1508 ExactAssemblyScope eas(GetVIXLAssembler(),
1509 kInstructionSize,
1510 CodeBufferCheckScope::kExactSize);
1511 __ ldr(wzr, MemOperand(temp, 0));
1512 RecordPcInfo(nullptr, 0);
1513 }
1514 }
1515
1516 if (!HasEmptyFrame()) {
1517 int frame_size = GetFrameSize();
1518 // Stack layout:
1519 // sp[frame_size - 8] : lr.
1520 // ... : other preserved core registers.
1521 // ... : other preserved fp registers.
1522 // ... : reserved frame space.
1523 // sp[0] : current method.
1524
1525 // Save the current method if we need it. Note that we do not
1526 // do this in HCurrentMethod, as the instruction might have been removed
1527 // in the SSA graph.
1528 if (RequiresCurrentMethod()) {
1529 __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1530 } else {
1531 __ Claim(frame_size);
1532 }
1533 GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1534 GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
1535 frame_size - GetCoreSpillSize());
1536 GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
1537 frame_size - FrameEntrySpillSize());
1538
1539 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1540 // Initialize should_deoptimize flag to 0.
1541 Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1542 __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1543 }
1544 }
1545
1546 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
1547 }
1548
GenerateFrameExit()1549 void CodeGeneratorARM64::GenerateFrameExit() {
1550 GetAssembler()->cfi().RememberState();
1551 if (!HasEmptyFrame()) {
1552 int frame_size = GetFrameSize();
1553 GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(),
1554 frame_size - FrameEntrySpillSize());
1555 GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(),
1556 frame_size - GetCoreSpillSize());
1557 __ Drop(frame_size);
1558 GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1559 }
1560 __ Ret();
1561 GetAssembler()->cfi().RestoreState();
1562 GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1563 }
1564
GetFramePreservedCoreRegisters() const1565 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1566 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1567 return CPURegList(CPURegister::kRegister, kXRegSize,
1568 core_spill_mask_);
1569 }
1570
GetFramePreservedFPRegisters() const1571 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1572 DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1573 GetNumberOfFloatingPointRegisters()));
1574 return CPURegList(CPURegister::kFPRegister, kDRegSize,
1575 fpu_spill_mask_);
1576 }
1577
Bind(HBasicBlock * block)1578 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1579 __ Bind(GetLabelOf(block));
1580 }
1581
MoveConstant(Location location,int32_t value)1582 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1583 DCHECK(location.IsRegister());
1584 __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1585 }
1586
AddLocationAsTemp(Location location,LocationSummary * locations)1587 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1588 if (location.IsRegister()) {
1589 locations->AddTemp(location);
1590 } else {
1591 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1592 }
1593 }
1594
MarkGCCard(Register object,Register value,bool value_can_be_null)1595 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
1596 UseScratchRegisterScope temps(GetVIXLAssembler());
1597 Register card = temps.AcquireX();
1598 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit.
1599 vixl::aarch64::Label done;
1600 if (value_can_be_null) {
1601 __ Cbz(value, &done);
1602 }
1603 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1604 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1605 __ Strb(card, MemOperand(card, temp.X()));
1606 if (value_can_be_null) {
1607 __ Bind(&done);
1608 }
1609 }
1610
SetupBlockedRegisters() const1611 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1612 // Blocked core registers:
1613 // lr : Runtime reserved.
1614 // tr : Runtime reserved.
1615 // mr : Runtime reserved.
1616 // ip1 : VIXL core temp.
1617 // ip0 : VIXL core temp.
1618 //
1619 // Blocked fp registers:
1620 // d31 : VIXL fp temp.
1621 CPURegList reserved_core_registers = vixl_reserved_core_registers;
1622 reserved_core_registers.Combine(runtime_reserved_core_registers);
1623 while (!reserved_core_registers.IsEmpty()) {
1624 blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1625 }
1626
1627 CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1628 while (!reserved_fp_registers.IsEmpty()) {
1629 blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1630 }
1631
1632 if (GetGraph()->IsDebuggable()) {
1633 // Stubs do not save callee-save floating point registers. If the graph
1634 // is debuggable, we need to deal with these registers differently. For
1635 // now, just block them.
1636 CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1637 while (!reserved_fp_registers_debuggable.IsEmpty()) {
1638 blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1639 }
1640 }
1641 }
1642
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1643 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1644 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1645 __ Str(reg, MemOperand(sp, stack_index));
1646 return kArm64WordSize;
1647 }
1648
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1649 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1650 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1651 __ Ldr(reg, MemOperand(sp, stack_index));
1652 return kArm64WordSize;
1653 }
1654
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1655 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1656 FPRegister reg = FPRegister(reg_id, kDRegSize);
1657 __ Str(reg, MemOperand(sp, stack_index));
1658 return kArm64WordSize;
1659 }
1660
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1661 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1662 FPRegister reg = FPRegister(reg_id, kDRegSize);
1663 __ Ldr(reg, MemOperand(sp, stack_index));
1664 return kArm64WordSize;
1665 }
1666
DumpCoreRegister(std::ostream & stream,int reg) const1667 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1668 stream << XRegister(reg);
1669 }
1670
DumpFloatingPointRegister(std::ostream & stream,int reg) const1671 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1672 stream << DRegister(reg);
1673 }
1674
MoveConstant(CPURegister destination,HConstant * constant)1675 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1676 if (constant->IsIntConstant()) {
1677 __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1678 } else if (constant->IsLongConstant()) {
1679 __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1680 } else if (constant->IsNullConstant()) {
1681 __ Mov(Register(destination), 0);
1682 } else if (constant->IsFloatConstant()) {
1683 __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
1684 } else {
1685 DCHECK(constant->IsDoubleConstant());
1686 __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue());
1687 }
1688 }
1689
1690
CoherentConstantAndType(Location constant,DataType::Type type)1691 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1692 DCHECK(constant.IsConstant());
1693 HConstant* cst = constant.GetConstant();
1694 return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1695 // Null is mapped to a core W register, which we associate with kPrimInt.
1696 (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1697 (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1698 (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1699 (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1700 }
1701
1702 // Allocate a scratch register from the VIXL pool, querying first
1703 // the floating-point register pool, and then the core register
1704 // pool. This is essentially a reimplementation of
1705 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1706 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1707 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1708 vixl::aarch64::UseScratchRegisterScope* temps,
1709 int size_in_bits) {
1710 return masm->GetScratchFPRegisterList()->IsEmpty()
1711 ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1712 : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1713 }
1714
MoveLocation(Location destination,Location source,DataType::Type dst_type)1715 void CodeGeneratorARM64::MoveLocation(Location destination,
1716 Location source,
1717 DataType::Type dst_type) {
1718 if (source.Equals(destination)) {
1719 return;
1720 }
1721
1722 // A valid move can always be inferred from the destination and source
1723 // locations. When moving from and to a register, the argument type can be
1724 // used to generate 32bit instead of 64bit moves. In debug mode we also
1725 // checks the coherency of the locations and the type.
1726 bool unspecified_type = (dst_type == DataType::Type::kVoid);
1727
1728 if (destination.IsRegister() || destination.IsFpuRegister()) {
1729 if (unspecified_type) {
1730 HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1731 if (source.IsStackSlot() ||
1732 (src_cst != nullptr && (src_cst->IsIntConstant()
1733 || src_cst->IsFloatConstant()
1734 || src_cst->IsNullConstant()))) {
1735 // For stack slots and 32bit constants, a 64bit type is appropriate.
1736 dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1737 } else {
1738 // If the source is a double stack slot or a 64bit constant, a 64bit
1739 // type is appropriate. Else the source is a register, and since the
1740 // type has not been specified, we chose a 64bit type to force a 64bit
1741 // move.
1742 dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1743 }
1744 }
1745 DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1746 (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1747 CPURegister dst = CPURegisterFrom(destination, dst_type);
1748 if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1749 DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1750 __ Ldr(dst, StackOperandFrom(source));
1751 } else if (source.IsSIMDStackSlot()) {
1752 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1753 } else if (source.IsConstant()) {
1754 DCHECK(CoherentConstantAndType(source, dst_type));
1755 MoveConstant(dst, source.GetConstant());
1756 } else if (source.IsRegister()) {
1757 if (destination.IsRegister()) {
1758 __ Mov(Register(dst), RegisterFrom(source, dst_type));
1759 } else {
1760 DCHECK(destination.IsFpuRegister());
1761 DataType::Type source_type = DataType::Is64BitType(dst_type)
1762 ? DataType::Type::kInt64
1763 : DataType::Type::kInt32;
1764 __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1765 }
1766 } else {
1767 DCHECK(source.IsFpuRegister());
1768 if (destination.IsRegister()) {
1769 DataType::Type source_type = DataType::Is64BitType(dst_type)
1770 ? DataType::Type::kFloat64
1771 : DataType::Type::kFloat32;
1772 __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1773 } else {
1774 DCHECK(destination.IsFpuRegister());
1775 if (GetGraph()->HasSIMD()) {
1776 __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1777 } else {
1778 __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
1779 }
1780 }
1781 }
1782 } else if (destination.IsSIMDStackSlot()) {
1783 if (source.IsFpuRegister()) {
1784 __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1785 } else {
1786 DCHECK(source.IsSIMDStackSlot());
1787 UseScratchRegisterScope temps(GetVIXLAssembler());
1788 if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) {
1789 Register temp = temps.AcquireX();
1790 __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1791 __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1792 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1793 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1794 } else {
1795 FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1796 __ Ldr(temp, StackOperandFrom(source));
1797 __ Str(temp, StackOperandFrom(destination));
1798 }
1799 }
1800 } else { // The destination is not a register. It must be a stack slot.
1801 DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1802 if (source.IsRegister() || source.IsFpuRegister()) {
1803 if (unspecified_type) {
1804 if (source.IsRegister()) {
1805 dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1806 } else {
1807 dst_type =
1808 destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1809 }
1810 }
1811 DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1812 (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1813 __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1814 } else if (source.IsConstant()) {
1815 DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1816 << source << " " << dst_type;
1817 UseScratchRegisterScope temps(GetVIXLAssembler());
1818 HConstant* src_cst = source.GetConstant();
1819 CPURegister temp;
1820 if (src_cst->IsZeroBitPattern()) {
1821 temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1822 ? Register(xzr)
1823 : Register(wzr);
1824 } else {
1825 if (src_cst->IsIntConstant()) {
1826 temp = temps.AcquireW();
1827 } else if (src_cst->IsLongConstant()) {
1828 temp = temps.AcquireX();
1829 } else if (src_cst->IsFloatConstant()) {
1830 temp = temps.AcquireS();
1831 } else {
1832 DCHECK(src_cst->IsDoubleConstant());
1833 temp = temps.AcquireD();
1834 }
1835 MoveConstant(temp, src_cst);
1836 }
1837 __ Str(temp, StackOperandFrom(destination));
1838 } else {
1839 DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1840 DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1841 UseScratchRegisterScope temps(GetVIXLAssembler());
1842 // Use any scratch register (a core or a floating-point one)
1843 // from VIXL scratch register pools as a temporary.
1844 //
1845 // We used to only use the FP scratch register pool, but in some
1846 // rare cases the only register from this pool (D31) would
1847 // already be used (e.g. within a ParallelMove instruction, when
1848 // a move is blocked by a another move requiring a scratch FP
1849 // register, which would reserve D31). To prevent this issue, we
1850 // ask for a scratch register of any type (core or FP).
1851 //
1852 // Also, we start by asking for a FP scratch register first, as the
1853 // demand of scratch core registers is higher. This is why we
1854 // use AcquireFPOrCoreCPURegisterOfSize instead of
1855 // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1856 // allocates core scratch registers first.
1857 CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1858 GetVIXLAssembler(),
1859 &temps,
1860 (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1861 __ Ldr(temp, StackOperandFrom(source));
1862 __ Str(temp, StackOperandFrom(destination));
1863 }
1864 }
1865 }
1866
Load(DataType::Type type,CPURegister dst,const MemOperand & src)1867 void CodeGeneratorARM64::Load(DataType::Type type,
1868 CPURegister dst,
1869 const MemOperand& src) {
1870 switch (type) {
1871 case DataType::Type::kBool:
1872 case DataType::Type::kUint8:
1873 __ Ldrb(Register(dst), src);
1874 break;
1875 case DataType::Type::kInt8:
1876 __ Ldrsb(Register(dst), src);
1877 break;
1878 case DataType::Type::kUint16:
1879 __ Ldrh(Register(dst), src);
1880 break;
1881 case DataType::Type::kInt16:
1882 __ Ldrsh(Register(dst), src);
1883 break;
1884 case DataType::Type::kInt32:
1885 case DataType::Type::kReference:
1886 case DataType::Type::kInt64:
1887 case DataType::Type::kFloat32:
1888 case DataType::Type::kFloat64:
1889 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1890 __ Ldr(dst, src);
1891 break;
1892 case DataType::Type::kUint32:
1893 case DataType::Type::kUint64:
1894 case DataType::Type::kVoid:
1895 LOG(FATAL) << "Unreachable type " << type;
1896 }
1897 }
1898
LoadAcquire(HInstruction * instruction,CPURegister dst,const MemOperand & src,bool needs_null_check)1899 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1900 CPURegister dst,
1901 const MemOperand& src,
1902 bool needs_null_check) {
1903 MacroAssembler* masm = GetVIXLAssembler();
1904 UseScratchRegisterScope temps(masm);
1905 Register temp_base = temps.AcquireX();
1906 DataType::Type type = instruction->GetType();
1907
1908 DCHECK(!src.IsPreIndex());
1909 DCHECK(!src.IsPostIndex());
1910
1911 // TODO(vixl): Let the MacroAssembler handle MemOperand.
1912 __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1913 {
1914 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1915 MemOperand base = MemOperand(temp_base);
1916 switch (type) {
1917 case DataType::Type::kBool:
1918 case DataType::Type::kUint8:
1919 case DataType::Type::kInt8:
1920 {
1921 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1922 __ ldarb(Register(dst), base);
1923 if (needs_null_check) {
1924 MaybeRecordImplicitNullCheck(instruction);
1925 }
1926 }
1927 if (type == DataType::Type::kInt8) {
1928 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1929 }
1930 break;
1931 case DataType::Type::kUint16:
1932 case DataType::Type::kInt16:
1933 {
1934 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1935 __ ldarh(Register(dst), base);
1936 if (needs_null_check) {
1937 MaybeRecordImplicitNullCheck(instruction);
1938 }
1939 }
1940 if (type == DataType::Type::kInt16) {
1941 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1942 }
1943 break;
1944 case DataType::Type::kInt32:
1945 case DataType::Type::kReference:
1946 case DataType::Type::kInt64:
1947 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1948 {
1949 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1950 __ ldar(Register(dst), base);
1951 if (needs_null_check) {
1952 MaybeRecordImplicitNullCheck(instruction);
1953 }
1954 }
1955 break;
1956 case DataType::Type::kFloat32:
1957 case DataType::Type::kFloat64: {
1958 DCHECK(dst.IsFPRegister());
1959 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1960
1961 Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1962 {
1963 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1964 __ ldar(temp, base);
1965 if (needs_null_check) {
1966 MaybeRecordImplicitNullCheck(instruction);
1967 }
1968 }
1969 __ Fmov(FPRegister(dst), temp);
1970 break;
1971 }
1972 case DataType::Type::kUint32:
1973 case DataType::Type::kUint64:
1974 case DataType::Type::kVoid:
1975 LOG(FATAL) << "Unreachable type " << type;
1976 }
1977 }
1978 }
1979
Store(DataType::Type type,CPURegister src,const MemOperand & dst)1980 void CodeGeneratorARM64::Store(DataType::Type type,
1981 CPURegister src,
1982 const MemOperand& dst) {
1983 switch (type) {
1984 case DataType::Type::kBool:
1985 case DataType::Type::kUint8:
1986 case DataType::Type::kInt8:
1987 __ Strb(Register(src), dst);
1988 break;
1989 case DataType::Type::kUint16:
1990 case DataType::Type::kInt16:
1991 __ Strh(Register(src), dst);
1992 break;
1993 case DataType::Type::kInt32:
1994 case DataType::Type::kReference:
1995 case DataType::Type::kInt64:
1996 case DataType::Type::kFloat32:
1997 case DataType::Type::kFloat64:
1998 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1999 __ Str(src, dst);
2000 break;
2001 case DataType::Type::kUint32:
2002 case DataType::Type::kUint64:
2003 case DataType::Type::kVoid:
2004 LOG(FATAL) << "Unreachable type " << type;
2005 }
2006 }
2007
StoreRelease(HInstruction * instruction,DataType::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)2008 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
2009 DataType::Type type,
2010 CPURegister src,
2011 const MemOperand& dst,
2012 bool needs_null_check) {
2013 MacroAssembler* masm = GetVIXLAssembler();
2014 UseScratchRegisterScope temps(GetVIXLAssembler());
2015 Register temp_base = temps.AcquireX();
2016
2017 DCHECK(!dst.IsPreIndex());
2018 DCHECK(!dst.IsPostIndex());
2019
2020 // TODO(vixl): Let the MacroAssembler handle this.
2021 Operand op = OperandFromMemOperand(dst);
2022 __ Add(temp_base, dst.GetBaseRegister(), op);
2023 MemOperand base = MemOperand(temp_base);
2024 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2025 switch (type) {
2026 case DataType::Type::kBool:
2027 case DataType::Type::kUint8:
2028 case DataType::Type::kInt8:
2029 {
2030 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2031 __ stlrb(Register(src), base);
2032 if (needs_null_check) {
2033 MaybeRecordImplicitNullCheck(instruction);
2034 }
2035 }
2036 break;
2037 case DataType::Type::kUint16:
2038 case DataType::Type::kInt16:
2039 {
2040 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2041 __ stlrh(Register(src), base);
2042 if (needs_null_check) {
2043 MaybeRecordImplicitNullCheck(instruction);
2044 }
2045 }
2046 break;
2047 case DataType::Type::kInt32:
2048 case DataType::Type::kReference:
2049 case DataType::Type::kInt64:
2050 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2051 {
2052 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2053 __ stlr(Register(src), base);
2054 if (needs_null_check) {
2055 MaybeRecordImplicitNullCheck(instruction);
2056 }
2057 }
2058 break;
2059 case DataType::Type::kFloat32:
2060 case DataType::Type::kFloat64: {
2061 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
2062 Register temp_src;
2063 if (src.IsZero()) {
2064 // The zero register is used to avoid synthesizing zero constants.
2065 temp_src = Register(src);
2066 } else {
2067 DCHECK(src.IsFPRegister());
2068 temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2069 __ Fmov(temp_src, FPRegister(src));
2070 }
2071 {
2072 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2073 __ stlr(temp_src, base);
2074 if (needs_null_check) {
2075 MaybeRecordImplicitNullCheck(instruction);
2076 }
2077 }
2078 break;
2079 }
2080 case DataType::Type::kUint32:
2081 case DataType::Type::kUint64:
2082 case DataType::Type::kVoid:
2083 LOG(FATAL) << "Unreachable type " << type;
2084 }
2085 }
2086
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2087 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
2088 HInstruction* instruction,
2089 uint32_t dex_pc,
2090 SlowPathCode* slow_path) {
2091 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2092
2093 __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
2094 {
2095 // Ensure the pc position is recorded immediately after the `blr` instruction.
2096 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2097 __ blr(lr);
2098 if (EntrypointRequiresStackMap(entrypoint)) {
2099 RecordPcInfo(instruction, dex_pc, slow_path);
2100 }
2101 }
2102 }
2103
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2104 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2105 HInstruction* instruction,
2106 SlowPathCode* slow_path) {
2107 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2108 __ Ldr(lr, MemOperand(tr, entry_point_offset));
2109 __ Blr(lr);
2110 }
2111
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)2112 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
2113 Register class_reg) {
2114 UseScratchRegisterScope temps(GetVIXLAssembler());
2115 Register temp = temps.AcquireW();
2116 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
2117 const size_t status_byte_offset =
2118 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
2119 constexpr uint32_t shifted_initialized_value =
2120 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
2121
2122 // Even if the initialized flag is set, we need to ensure consistent memory ordering.
2123 // TODO(vixl): Let the MacroAssembler handle MemOperand.
2124 __ Add(temp, class_reg, status_byte_offset);
2125 __ Ldarb(temp, HeapOperand(temp));
2126 __ Cmp(temp, shifted_initialized_value);
2127 __ B(lo, slow_path->GetEntryLabel());
2128 __ Bind(slow_path->GetExitLabel());
2129 }
2130
GenerateMemoryBarrier(MemBarrierKind kind)2131 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
2132 BarrierType type = BarrierAll;
2133
2134 switch (kind) {
2135 case MemBarrierKind::kAnyAny:
2136 case MemBarrierKind::kAnyStore: {
2137 type = BarrierAll;
2138 break;
2139 }
2140 case MemBarrierKind::kLoadAny: {
2141 type = BarrierReads;
2142 break;
2143 }
2144 case MemBarrierKind::kStoreStore: {
2145 type = BarrierWrites;
2146 break;
2147 }
2148 default:
2149 LOG(FATAL) << "Unexpected memory barrier " << kind;
2150 }
2151 __ Dmb(InnerShareable, type);
2152 }
2153
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)2154 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
2155 HBasicBlock* successor) {
2156 SuspendCheckSlowPathARM64* slow_path =
2157 down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
2158 if (slow_path == nullptr) {
2159 slow_path =
2160 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
2161 instruction->SetSlowPath(slow_path);
2162 codegen_->AddSlowPath(slow_path);
2163 if (successor != nullptr) {
2164 DCHECK(successor->IsLoopHeader());
2165 }
2166 } else {
2167 DCHECK_EQ(slow_path->GetSuccessor(), successor);
2168 }
2169
2170 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
2171 Register temp = temps.AcquireW();
2172
2173 __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
2174 if (successor == nullptr) {
2175 __ Cbnz(temp, slow_path->GetEntryLabel());
2176 __ Bind(slow_path->GetReturnLabel());
2177 } else {
2178 __ Cbz(temp, codegen_->GetLabelOf(successor));
2179 __ B(slow_path->GetEntryLabel());
2180 // slow_path will return to GetLabelOf(successor).
2181 }
2182 }
2183
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)2184 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
2185 CodeGeneratorARM64* codegen)
2186 : InstructionCodeGenerator(graph, codegen),
2187 assembler_(codegen->GetAssembler()),
2188 codegen_(codegen) {}
2189
HandleBinaryOp(HBinaryOperation * instr)2190 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
2191 DCHECK_EQ(instr->InputCount(), 2U);
2192 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2193 DataType::Type type = instr->GetResultType();
2194 switch (type) {
2195 case DataType::Type::kInt32:
2196 case DataType::Type::kInt64:
2197 locations->SetInAt(0, Location::RequiresRegister());
2198 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
2199 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2200 break;
2201
2202 case DataType::Type::kFloat32:
2203 case DataType::Type::kFloat64:
2204 locations->SetInAt(0, Location::RequiresFpuRegister());
2205 locations->SetInAt(1, Location::RequiresFpuRegister());
2206 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2207 break;
2208
2209 default:
2210 LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
2211 }
2212 }
2213
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2214 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
2215 const FieldInfo& field_info) {
2216 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2217
2218 bool object_field_get_with_read_barrier =
2219 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2220 LocationSummary* locations =
2221 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2222 object_field_get_with_read_barrier
2223 ? LocationSummary::kCallOnSlowPath
2224 : LocationSummary::kNoCall);
2225 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
2226 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2227 // We need a temporary register for the read barrier marking slow
2228 // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
2229 if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2230 !Runtime::Current()->UseJitCompilation() &&
2231 !field_info.IsVolatile()) {
2232 // If link-time thunks for the Baker read barrier are enabled, for AOT
2233 // non-volatile loads we need a temporary only if the offset is too big.
2234 if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
2235 locations->AddTemp(FixedTempLocation());
2236 }
2237 } else {
2238 locations->AddTemp(Location::RequiresRegister());
2239 }
2240 }
2241 locations->SetInAt(0, Location::RequiresRegister());
2242 if (DataType::IsFloatingPointType(instruction->GetType())) {
2243 locations->SetOut(Location::RequiresFpuRegister());
2244 } else {
2245 // The output overlaps for an object field get when read barriers
2246 // are enabled: we do not want the load to overwrite the object's
2247 // location, as we need it to emit the read barrier.
2248 locations->SetOut(
2249 Location::RequiresRegister(),
2250 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2251 }
2252 }
2253
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2254 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2255 const FieldInfo& field_info) {
2256 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2257 LocationSummary* locations = instruction->GetLocations();
2258 Location base_loc = locations->InAt(0);
2259 Location out = locations->Out();
2260 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2261 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
2262 DataType::Type load_type = instruction->GetType();
2263 MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
2264
2265 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
2266 load_type == DataType::Type::kReference) {
2267 // Object FieldGet with Baker's read barrier case.
2268 // /* HeapReference<Object> */ out = *(base + offset)
2269 Register base = RegisterFrom(base_loc, DataType::Type::kReference);
2270 Location maybe_temp =
2271 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2272 // Note that potential implicit null checks are handled in this
2273 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2274 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2275 instruction,
2276 out,
2277 base,
2278 offset,
2279 maybe_temp,
2280 /* needs_null_check */ true,
2281 field_info.IsVolatile());
2282 } else {
2283 // General case.
2284 if (field_info.IsVolatile()) {
2285 // Note that a potential implicit null check is handled in this
2286 // CodeGeneratorARM64::LoadAcquire call.
2287 // NB: LoadAcquire will record the pc info if needed.
2288 codegen_->LoadAcquire(
2289 instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
2290 } else {
2291 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2292 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2293 codegen_->Load(load_type, OutputCPURegister(instruction), field);
2294 codegen_->MaybeRecordImplicitNullCheck(instruction);
2295 }
2296 if (load_type == DataType::Type::kReference) {
2297 // If read barriers are enabled, emit read barriers other than
2298 // Baker's using a slow path (and also unpoison the loaded
2299 // reference, if heap poisoning is enabled).
2300 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2301 }
2302 }
2303 }
2304
HandleFieldSet(HInstruction * instruction)2305 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2306 LocationSummary* locations =
2307 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2308 locations->SetInAt(0, Location::RequiresRegister());
2309 if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
2310 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
2311 } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
2312 locations->SetInAt(1, Location::RequiresFpuRegister());
2313 } else {
2314 locations->SetInAt(1, Location::RequiresRegister());
2315 }
2316 }
2317
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)2318 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2319 const FieldInfo& field_info,
2320 bool value_can_be_null) {
2321 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2322
2323 Register obj = InputRegisterAt(instruction, 0);
2324 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2325 CPURegister source = value;
2326 Offset offset = field_info.GetFieldOffset();
2327 DataType::Type field_type = field_info.GetFieldType();
2328
2329 {
2330 // We use a block to end the scratch scope before the write barrier, thus
2331 // freeing the temporary registers so they can be used in `MarkGCCard`.
2332 UseScratchRegisterScope temps(GetVIXLAssembler());
2333
2334 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
2335 DCHECK(value.IsW());
2336 Register temp = temps.AcquireW();
2337 __ Mov(temp, value.W());
2338 GetAssembler()->PoisonHeapReference(temp.W());
2339 source = temp;
2340 }
2341
2342 if (field_info.IsVolatile()) {
2343 codegen_->StoreRelease(
2344 instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true);
2345 } else {
2346 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2347 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2348 codegen_->Store(field_type, source, HeapOperand(obj, offset));
2349 codegen_->MaybeRecordImplicitNullCheck(instruction);
2350 }
2351 }
2352
2353 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
2354 codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
2355 }
2356 }
2357
HandleBinaryOp(HBinaryOperation * instr)2358 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2359 DataType::Type type = instr->GetType();
2360
2361 switch (type) {
2362 case DataType::Type::kInt32:
2363 case DataType::Type::kInt64: {
2364 Register dst = OutputRegister(instr);
2365 Register lhs = InputRegisterAt(instr, 0);
2366 Operand rhs = InputOperandAt(instr, 1);
2367 if (instr->IsAdd()) {
2368 __ Add(dst, lhs, rhs);
2369 } else if (instr->IsAnd()) {
2370 __ And(dst, lhs, rhs);
2371 } else if (instr->IsOr()) {
2372 __ Orr(dst, lhs, rhs);
2373 } else if (instr->IsSub()) {
2374 __ Sub(dst, lhs, rhs);
2375 } else if (instr->IsRor()) {
2376 if (rhs.IsImmediate()) {
2377 uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2378 __ Ror(dst, lhs, shift);
2379 } else {
2380 // Ensure shift distance is in the same size register as the result. If
2381 // we are rotating a long and the shift comes in a w register originally,
2382 // we don't need to sxtw for use as an x since the shift distances are
2383 // all & reg_bits - 1.
2384 __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2385 }
2386 } else {
2387 DCHECK(instr->IsXor());
2388 __ Eor(dst, lhs, rhs);
2389 }
2390 break;
2391 }
2392 case DataType::Type::kFloat32:
2393 case DataType::Type::kFloat64: {
2394 FPRegister dst = OutputFPRegister(instr);
2395 FPRegister lhs = InputFPRegisterAt(instr, 0);
2396 FPRegister rhs = InputFPRegisterAt(instr, 1);
2397 if (instr->IsAdd()) {
2398 __ Fadd(dst, lhs, rhs);
2399 } else if (instr->IsSub()) {
2400 __ Fsub(dst, lhs, rhs);
2401 } else {
2402 LOG(FATAL) << "Unexpected floating-point binary operation";
2403 }
2404 break;
2405 }
2406 default:
2407 LOG(FATAL) << "Unexpected binary operation type " << type;
2408 }
2409 }
2410
HandleShift(HBinaryOperation * instr)2411 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2412 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2413
2414 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2415 DataType::Type type = instr->GetResultType();
2416 switch (type) {
2417 case DataType::Type::kInt32:
2418 case DataType::Type::kInt64: {
2419 locations->SetInAt(0, Location::RequiresRegister());
2420 locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2421 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2422 break;
2423 }
2424 default:
2425 LOG(FATAL) << "Unexpected shift type " << type;
2426 }
2427 }
2428
HandleShift(HBinaryOperation * instr)2429 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2430 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2431
2432 DataType::Type type = instr->GetType();
2433 switch (type) {
2434 case DataType::Type::kInt32:
2435 case DataType::Type::kInt64: {
2436 Register dst = OutputRegister(instr);
2437 Register lhs = InputRegisterAt(instr, 0);
2438 Operand rhs = InputOperandAt(instr, 1);
2439 if (rhs.IsImmediate()) {
2440 uint32_t shift_value = rhs.GetImmediate() &
2441 (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2442 if (instr->IsShl()) {
2443 __ Lsl(dst, lhs, shift_value);
2444 } else if (instr->IsShr()) {
2445 __ Asr(dst, lhs, shift_value);
2446 } else {
2447 __ Lsr(dst, lhs, shift_value);
2448 }
2449 } else {
2450 Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2451
2452 if (instr->IsShl()) {
2453 __ Lsl(dst, lhs, rhs_reg);
2454 } else if (instr->IsShr()) {
2455 __ Asr(dst, lhs, rhs_reg);
2456 } else {
2457 __ Lsr(dst, lhs, rhs_reg);
2458 }
2459 }
2460 break;
2461 }
2462 default:
2463 LOG(FATAL) << "Unexpected shift operation type " << type;
2464 }
2465 }
2466
VisitAdd(HAdd * instruction)2467 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2468 HandleBinaryOp(instruction);
2469 }
2470
VisitAdd(HAdd * instruction)2471 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2472 HandleBinaryOp(instruction);
2473 }
2474
VisitAnd(HAnd * instruction)2475 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2476 HandleBinaryOp(instruction);
2477 }
2478
VisitAnd(HAnd * instruction)2479 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2480 HandleBinaryOp(instruction);
2481 }
2482
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2483 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2484 DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2485 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2486 locations->SetInAt(0, Location::RequiresRegister());
2487 // There is no immediate variant of negated bitwise instructions in AArch64.
2488 locations->SetInAt(1, Location::RequiresRegister());
2489 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2490 }
2491
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2492 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2493 Register dst = OutputRegister(instr);
2494 Register lhs = InputRegisterAt(instr, 0);
2495 Register rhs = InputRegisterAt(instr, 1);
2496
2497 switch (instr->GetOpKind()) {
2498 case HInstruction::kAnd:
2499 __ Bic(dst, lhs, rhs);
2500 break;
2501 case HInstruction::kOr:
2502 __ Orn(dst, lhs, rhs);
2503 break;
2504 case HInstruction::kXor:
2505 __ Eon(dst, lhs, rhs);
2506 break;
2507 default:
2508 LOG(FATAL) << "Unreachable";
2509 }
2510 }
2511
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2512 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2513 HDataProcWithShifterOp* instruction) {
2514 DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2515 instruction->GetType() == DataType::Type::kInt64);
2516 LocationSummary* locations =
2517 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2518 if (instruction->GetInstrKind() == HInstruction::kNeg) {
2519 locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
2520 } else {
2521 locations->SetInAt(0, Location::RequiresRegister());
2522 }
2523 locations->SetInAt(1, Location::RequiresRegister());
2524 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2525 }
2526
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2527 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2528 HDataProcWithShifterOp* instruction) {
2529 DataType::Type type = instruction->GetType();
2530 HInstruction::InstructionKind kind = instruction->GetInstrKind();
2531 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2532 Register out = OutputRegister(instruction);
2533 Register left;
2534 if (kind != HInstruction::kNeg) {
2535 left = InputRegisterAt(instruction, 0);
2536 }
2537 // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2538 // shifter operand operation, the IR generating `right_reg` (input to the type
2539 // conversion) can have a different type from the current instruction's type,
2540 // so we manually indicate the type.
2541 Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2542 Operand right_operand(0);
2543
2544 HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2545 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2546 right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2547 } else {
2548 right_operand = Operand(right_reg,
2549 helpers::ShiftFromOpKind(op_kind),
2550 instruction->GetShiftAmount());
2551 }
2552
2553 // Logical binary operations do not support extension operations in the
2554 // operand. Note that VIXL would still manage if it was passed by generating
2555 // the extension as a separate instruction.
2556 // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2557 DCHECK(!right_operand.IsExtendedRegister() ||
2558 (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
2559 kind != HInstruction::kNeg));
2560 switch (kind) {
2561 case HInstruction::kAdd:
2562 __ Add(out, left, right_operand);
2563 break;
2564 case HInstruction::kAnd:
2565 __ And(out, left, right_operand);
2566 break;
2567 case HInstruction::kNeg:
2568 DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2569 __ Neg(out, right_operand);
2570 break;
2571 case HInstruction::kOr:
2572 __ Orr(out, left, right_operand);
2573 break;
2574 case HInstruction::kSub:
2575 __ Sub(out, left, right_operand);
2576 break;
2577 case HInstruction::kXor:
2578 __ Eor(out, left, right_operand);
2579 break;
2580 default:
2581 LOG(FATAL) << "Unexpected operation kind: " << kind;
2582 UNREACHABLE();
2583 }
2584 }
2585
VisitIntermediateAddress(HIntermediateAddress * instruction)2586 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2587 LocationSummary* locations =
2588 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2589 locations->SetInAt(0, Location::RequiresRegister());
2590 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2591 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2592 }
2593
VisitIntermediateAddress(HIntermediateAddress * instruction)2594 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2595 __ Add(OutputRegister(instruction),
2596 InputRegisterAt(instruction, 0),
2597 Operand(InputOperandAt(instruction, 1)));
2598 }
2599
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2600 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2601 LocationSummary* locations =
2602 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2603
2604 HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2605
2606 locations->SetInAt(0, Location::RequiresRegister());
2607 // For byte case we don't need to shift the index variable so we can encode the data offset into
2608 // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2609 // data offset constant generation out of the loop and reduce the critical path length in the
2610 // loop.
2611 locations->SetInAt(1, shift->GetValue() == 0
2612 ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
2613 : Location::RequiresRegister());
2614 locations->SetInAt(2, Location::ConstantLocation(shift));
2615 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2616 }
2617
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2618 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2619 HIntermediateAddressIndex* instruction) {
2620 Register index_reg = InputRegisterAt(instruction, 0);
2621 uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2));
2622 uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2623
2624 if (shift == 0) {
2625 __ Add(OutputRegister(instruction), index_reg, offset);
2626 } else {
2627 Register offset_reg = InputRegisterAt(instruction, 1);
2628 __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2629 }
2630 }
2631
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2632 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2633 LocationSummary* locations =
2634 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2635 HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2636 if (instr->GetOpKind() == HInstruction::kSub &&
2637 accumulator->IsConstant() &&
2638 accumulator->AsConstant()->IsArithmeticZero()) {
2639 // Don't allocate register for Mneg instruction.
2640 } else {
2641 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2642 Location::RequiresRegister());
2643 }
2644 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2645 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2646 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2647 }
2648
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2649 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2650 Register res = OutputRegister(instr);
2651 Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2652 Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2653
2654 // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2655 // This fixup should be carried out for all multiply-accumulate instructions:
2656 // madd, msub, smaddl, smsubl, umaddl and umsubl.
2657 if (instr->GetType() == DataType::Type::kInt64 &&
2658 codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2659 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2660 vixl::aarch64::Instruction* prev =
2661 masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
2662 if (prev->IsLoadOrStore()) {
2663 // Make sure we emit only exactly one nop.
2664 ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2665 __ nop();
2666 }
2667 }
2668
2669 if (instr->GetOpKind() == HInstruction::kAdd) {
2670 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2671 __ Madd(res, mul_left, mul_right, accumulator);
2672 } else {
2673 DCHECK(instr->GetOpKind() == HInstruction::kSub);
2674 HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2675 if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2676 __ Mneg(res, mul_left, mul_right);
2677 } else {
2678 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2679 __ Msub(res, mul_left, mul_right, accumulator);
2680 }
2681 }
2682 }
2683
VisitArrayGet(HArrayGet * instruction)2684 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2685 bool object_array_get_with_read_barrier =
2686 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2687 LocationSummary* locations =
2688 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2689 object_array_get_with_read_barrier
2690 ? LocationSummary::kCallOnSlowPath
2691 : LocationSummary::kNoCall);
2692 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2693 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2694 // We need a temporary register for the read barrier marking slow
2695 // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
2696 if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2697 !Runtime::Current()->UseJitCompilation() &&
2698 instruction->GetIndex()->IsConstant()) {
2699 // Array loads with constant index are treated as field loads.
2700 // If link-time thunks for the Baker read barrier are enabled, for AOT
2701 // constant index loads we need a temporary only if the offset is too big.
2702 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2703 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2704 offset += index << DataType::SizeShift(DataType::Type::kReference);
2705 if (offset >= kReferenceLoadMinFarOffset) {
2706 locations->AddTemp(FixedTempLocation());
2707 }
2708 } else {
2709 locations->AddTemp(Location::RequiresRegister());
2710 }
2711 }
2712 locations->SetInAt(0, Location::RequiresRegister());
2713 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2714 if (DataType::IsFloatingPointType(instruction->GetType())) {
2715 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2716 } else {
2717 // The output overlaps in the case of an object array get with
2718 // read barriers enabled: we do not want the move to overwrite the
2719 // array's location, as we need it to emit the read barrier.
2720 locations->SetOut(
2721 Location::RequiresRegister(),
2722 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2723 }
2724 }
2725
VisitArrayGet(HArrayGet * instruction)2726 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2727 DataType::Type type = instruction->GetType();
2728 Register obj = InputRegisterAt(instruction, 0);
2729 LocationSummary* locations = instruction->GetLocations();
2730 Location index = locations->InAt(1);
2731 Location out = locations->Out();
2732 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2733 const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2734 instruction->IsStringCharAt();
2735 MacroAssembler* masm = GetVIXLAssembler();
2736 UseScratchRegisterScope temps(masm);
2737
2738 // The read barrier instrumentation of object ArrayGet instructions
2739 // does not support the HIntermediateAddress instruction.
2740 DCHECK(!((type == DataType::Type::kReference) &&
2741 instruction->GetArray()->IsIntermediateAddress() &&
2742 kEmitCompilerReadBarrier));
2743
2744 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2745 // Object ArrayGet with Baker's read barrier case.
2746 // Note that a potential implicit null check is handled in the
2747 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2748 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2749 if (index.IsConstant()) {
2750 // Array load with a constant index can be treated as a field load.
2751 offset += Int64ConstantFrom(index) << DataType::SizeShift(type);
2752 Location maybe_temp =
2753 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2754 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2755 out,
2756 obj.W(),
2757 offset,
2758 maybe_temp,
2759 /* needs_null_check */ false,
2760 /* use_load_acquire */ false);
2761 } else {
2762 Register temp = WRegisterFrom(locations->GetTemp(0));
2763 codegen_->GenerateArrayLoadWithBakerReadBarrier(
2764 instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
2765 }
2766 } else {
2767 // General case.
2768 MemOperand source = HeapOperand(obj);
2769 Register length;
2770 if (maybe_compressed_char_at) {
2771 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2772 length = temps.AcquireW();
2773 {
2774 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2775 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2776
2777 if (instruction->GetArray()->IsIntermediateAddress()) {
2778 DCHECK_LT(count_offset, offset);
2779 int64_t adjusted_offset =
2780 static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2781 // Note that `adjusted_offset` is negative, so this will be a LDUR.
2782 __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2783 } else {
2784 __ Ldr(length, HeapOperand(obj, count_offset));
2785 }
2786 codegen_->MaybeRecordImplicitNullCheck(instruction);
2787 }
2788 }
2789 if (index.IsConstant()) {
2790 if (maybe_compressed_char_at) {
2791 vixl::aarch64::Label uncompressed_load, done;
2792 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2793 "Expecting 0=compressed, 1=uncompressed");
2794 __ Tbnz(length.W(), 0, &uncompressed_load);
2795 __ Ldrb(Register(OutputCPURegister(instruction)),
2796 HeapOperand(obj, offset + Int64ConstantFrom(index)));
2797 __ B(&done);
2798 __ Bind(&uncompressed_load);
2799 __ Ldrh(Register(OutputCPURegister(instruction)),
2800 HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
2801 __ Bind(&done);
2802 } else {
2803 offset += Int64ConstantFrom(index) << DataType::SizeShift(type);
2804 source = HeapOperand(obj, offset);
2805 }
2806 } else {
2807 Register temp = temps.AcquireSameSizeAs(obj);
2808 if (instruction->GetArray()->IsIntermediateAddress()) {
2809 // We do not need to compute the intermediate address from the array: the
2810 // input instruction has done it already. See the comment in
2811 // `TryExtractArrayAccessAddress()`.
2812 if (kIsDebugBuild) {
2813 HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2814 DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2815 }
2816 temp = obj;
2817 } else {
2818 __ Add(temp, obj, offset);
2819 }
2820 if (maybe_compressed_char_at) {
2821 vixl::aarch64::Label uncompressed_load, done;
2822 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2823 "Expecting 0=compressed, 1=uncompressed");
2824 __ Tbnz(length.W(), 0, &uncompressed_load);
2825 __ Ldrb(Register(OutputCPURegister(instruction)),
2826 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2827 __ B(&done);
2828 __ Bind(&uncompressed_load);
2829 __ Ldrh(Register(OutputCPURegister(instruction)),
2830 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2831 __ Bind(&done);
2832 } else {
2833 source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2834 }
2835 }
2836 if (!maybe_compressed_char_at) {
2837 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2838 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2839 codegen_->Load(type, OutputCPURegister(instruction), source);
2840 codegen_->MaybeRecordImplicitNullCheck(instruction);
2841 }
2842
2843 if (type == DataType::Type::kReference) {
2844 static_assert(
2845 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2846 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2847 Location obj_loc = locations->InAt(0);
2848 if (index.IsConstant()) {
2849 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2850 } else {
2851 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2852 }
2853 }
2854 }
2855 }
2856
VisitArrayLength(HArrayLength * instruction)2857 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2858 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2859 locations->SetInAt(0, Location::RequiresRegister());
2860 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2861 }
2862
VisitArrayLength(HArrayLength * instruction)2863 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2864 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2865 vixl::aarch64::Register out = OutputRegister(instruction);
2866 {
2867 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2868 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2869 __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2870 codegen_->MaybeRecordImplicitNullCheck(instruction);
2871 }
2872 // Mask out compression flag from String's array length.
2873 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2874 __ Lsr(out.W(), out.W(), 1u);
2875 }
2876 }
2877
VisitArraySet(HArraySet * instruction)2878 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2879 DataType::Type value_type = instruction->GetComponentType();
2880
2881 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2882 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
2883 instruction,
2884 may_need_runtime_call_for_type_check ?
2885 LocationSummary::kCallOnSlowPath :
2886 LocationSummary::kNoCall);
2887 locations->SetInAt(0, Location::RequiresRegister());
2888 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2889 if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
2890 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
2891 } else if (DataType::IsFloatingPointType(value_type)) {
2892 locations->SetInAt(2, Location::RequiresFpuRegister());
2893 } else {
2894 locations->SetInAt(2, Location::RequiresRegister());
2895 }
2896 }
2897
VisitArraySet(HArraySet * instruction)2898 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2899 DataType::Type value_type = instruction->GetComponentType();
2900 LocationSummary* locations = instruction->GetLocations();
2901 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2902 bool needs_write_barrier =
2903 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2904
2905 Register array = InputRegisterAt(instruction, 0);
2906 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2907 CPURegister source = value;
2908 Location index = locations->InAt(1);
2909 size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
2910 MemOperand destination = HeapOperand(array);
2911 MacroAssembler* masm = GetVIXLAssembler();
2912
2913 if (!needs_write_barrier) {
2914 DCHECK(!may_need_runtime_call_for_type_check);
2915 if (index.IsConstant()) {
2916 offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type);
2917 destination = HeapOperand(array, offset);
2918 } else {
2919 UseScratchRegisterScope temps(masm);
2920 Register temp = temps.AcquireSameSizeAs(array);
2921 if (instruction->GetArray()->IsIntermediateAddress()) {
2922 // We do not need to compute the intermediate address from the array: the
2923 // input instruction has done it already. See the comment in
2924 // `TryExtractArrayAccessAddress()`.
2925 if (kIsDebugBuild) {
2926 HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2927 DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2928 }
2929 temp = array;
2930 } else {
2931 __ Add(temp, array, offset);
2932 }
2933 destination = HeapOperand(temp,
2934 XRegisterFrom(index),
2935 LSL,
2936 DataType::SizeShift(value_type));
2937 }
2938 {
2939 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2940 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2941 codegen_->Store(value_type, value, destination);
2942 codegen_->MaybeRecordImplicitNullCheck(instruction);
2943 }
2944 } else {
2945 DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2946 vixl::aarch64::Label done;
2947 SlowPathCodeARM64* slow_path = nullptr;
2948 {
2949 // We use a block to end the scratch scope before the write barrier, thus
2950 // freeing the temporary registers so they can be used in `MarkGCCard`.
2951 UseScratchRegisterScope temps(masm);
2952 Register temp = temps.AcquireSameSizeAs(array);
2953 if (index.IsConstant()) {
2954 offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type);
2955 destination = HeapOperand(array, offset);
2956 } else {
2957 destination = HeapOperand(temp,
2958 XRegisterFrom(index),
2959 LSL,
2960 DataType::SizeShift(value_type));
2961 }
2962
2963 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2964 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2965 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2966
2967 if (may_need_runtime_call_for_type_check) {
2968 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
2969 codegen_->AddSlowPath(slow_path);
2970 if (instruction->GetValueCanBeNull()) {
2971 vixl::aarch64::Label non_zero;
2972 __ Cbnz(Register(value), &non_zero);
2973 if (!index.IsConstant()) {
2974 __ Add(temp, array, offset);
2975 }
2976 {
2977 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
2978 // emitted.
2979 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2980 __ Str(wzr, destination);
2981 codegen_->MaybeRecordImplicitNullCheck(instruction);
2982 }
2983 __ B(&done);
2984 __ Bind(&non_zero);
2985 }
2986
2987 // Note that when Baker read barriers are enabled, the type
2988 // checks are performed without read barriers. This is fine,
2989 // even in the case where a class object is in the from-space
2990 // after the flip, as a comparison involving such a type would
2991 // not produce a false positive; it may of course produce a
2992 // false negative, in which case we would take the ArraySet
2993 // slow path.
2994
2995 Register temp2 = temps.AcquireSameSizeAs(array);
2996 // /* HeapReference<Class> */ temp = array->klass_
2997 {
2998 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2999 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3000 __ Ldr(temp, HeapOperand(array, class_offset));
3001 codegen_->MaybeRecordImplicitNullCheck(instruction);
3002 }
3003 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3004
3005 // /* HeapReference<Class> */ temp = temp->component_type_
3006 __ Ldr(temp, HeapOperand(temp, component_offset));
3007 // /* HeapReference<Class> */ temp2 = value->klass_
3008 __ Ldr(temp2, HeapOperand(Register(value), class_offset));
3009 // If heap poisoning is enabled, no need to unpoison `temp`
3010 // nor `temp2`, as we are comparing two poisoned references.
3011 __ Cmp(temp, temp2);
3012 temps.Release(temp2);
3013
3014 if (instruction->StaticTypeOfArrayIsObjectArray()) {
3015 vixl::aarch64::Label do_put;
3016 __ B(eq, &do_put);
3017 // If heap poisoning is enabled, the `temp` reference has
3018 // not been unpoisoned yet; unpoison it now.
3019 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3020
3021 // /* HeapReference<Class> */ temp = temp->super_class_
3022 __ Ldr(temp, HeapOperand(temp, super_offset));
3023 // If heap poisoning is enabled, no need to unpoison
3024 // `temp`, as we are comparing against null below.
3025 __ Cbnz(temp, slow_path->GetEntryLabel());
3026 __ Bind(&do_put);
3027 } else {
3028 __ B(ne, slow_path->GetEntryLabel());
3029 }
3030 }
3031
3032 if (kPoisonHeapReferences) {
3033 Register temp2 = temps.AcquireSameSizeAs(array);
3034 DCHECK(value.IsW());
3035 __ Mov(temp2, value.W());
3036 GetAssembler()->PoisonHeapReference(temp2);
3037 source = temp2;
3038 }
3039
3040 if (!index.IsConstant()) {
3041 __ Add(temp, array, offset);
3042 } else {
3043 // We no longer need the `temp` here so release it as the store below may
3044 // need a scratch register (if the constant index makes the offset too large)
3045 // and the poisoned `source` could be using the other scratch register.
3046 temps.Release(temp);
3047 }
3048 {
3049 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3050 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3051 __ Str(source, destination);
3052
3053 if (!may_need_runtime_call_for_type_check) {
3054 codegen_->MaybeRecordImplicitNullCheck(instruction);
3055 }
3056 }
3057 }
3058
3059 codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
3060
3061 if (done.IsLinked()) {
3062 __ Bind(&done);
3063 }
3064
3065 if (slow_path != nullptr) {
3066 __ Bind(slow_path->GetExitLabel());
3067 }
3068 }
3069 }
3070
VisitBoundsCheck(HBoundsCheck * instruction)3071 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3072 RegisterSet caller_saves = RegisterSet::Empty();
3073 InvokeRuntimeCallingConvention calling_convention;
3074 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3075 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
3076 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
3077 locations->SetInAt(0, Location::RequiresRegister());
3078 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3079 }
3080
VisitBoundsCheck(HBoundsCheck * instruction)3081 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3082 BoundsCheckSlowPathARM64* slow_path =
3083 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3084 codegen_->AddSlowPath(slow_path);
3085 __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
3086 __ B(slow_path->GetEntryLabel(), hs);
3087 }
3088
VisitClinitCheck(HClinitCheck * check)3089 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
3090 LocationSummary* locations =
3091 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
3092 locations->SetInAt(0, Location::RequiresRegister());
3093 if (check->HasUses()) {
3094 locations->SetOut(Location::SameAsFirstInput());
3095 }
3096 }
3097
VisitClinitCheck(HClinitCheck * check)3098 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
3099 // We assume the class is not null.
3100 SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(
3101 check->GetLoadClass(), check, check->GetDexPc(), true);
3102 codegen_->AddSlowPath(slow_path);
3103 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
3104 }
3105
IsFloatingPointZeroConstant(HInstruction * inst)3106 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
3107 return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
3108 || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
3109 }
3110
GenerateFcmp(HInstruction * instruction)3111 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
3112 FPRegister lhs_reg = InputFPRegisterAt(instruction, 0);
3113 Location rhs_loc = instruction->GetLocations()->InAt(1);
3114 if (rhs_loc.IsConstant()) {
3115 // 0.0 is the only immediate that can be encoded directly in
3116 // an FCMP instruction.
3117 //
3118 // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
3119 // specify that in a floating-point comparison, positive zero
3120 // and negative zero are considered equal, so we can use the
3121 // literal 0.0 for both cases here.
3122 //
3123 // Note however that some methods (Float.equal, Float.compare,
3124 // Float.compareTo, Double.equal, Double.compare,
3125 // Double.compareTo, Math.max, Math.min, StrictMath.max,
3126 // StrictMath.min) consider 0.0 to be (strictly) greater than
3127 // -0.0. So if we ever translate calls to these methods into a
3128 // HCompare instruction, we must handle the -0.0 case with
3129 // care here.
3130 DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
3131 __ Fcmp(lhs_reg, 0.0);
3132 } else {
3133 __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
3134 }
3135 }
3136
VisitCompare(HCompare * compare)3137 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
3138 LocationSummary* locations =
3139 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
3140 DataType::Type in_type = compare->InputAt(0)->GetType();
3141 switch (in_type) {
3142 case DataType::Type::kBool:
3143 case DataType::Type::kUint8:
3144 case DataType::Type::kInt8:
3145 case DataType::Type::kUint16:
3146 case DataType::Type::kInt16:
3147 case DataType::Type::kInt32:
3148 case DataType::Type::kInt64: {
3149 locations->SetInAt(0, Location::RequiresRegister());
3150 locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
3151 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3152 break;
3153 }
3154 case DataType::Type::kFloat32:
3155 case DataType::Type::kFloat64: {
3156 locations->SetInAt(0, Location::RequiresFpuRegister());
3157 locations->SetInAt(1,
3158 IsFloatingPointZeroConstant(compare->InputAt(1))
3159 ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
3160 : Location::RequiresFpuRegister());
3161 locations->SetOut(Location::RequiresRegister());
3162 break;
3163 }
3164 default:
3165 LOG(FATAL) << "Unexpected type for compare operation " << in_type;
3166 }
3167 }
3168
VisitCompare(HCompare * compare)3169 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
3170 DataType::Type in_type = compare->InputAt(0)->GetType();
3171
3172 // 0 if: left == right
3173 // 1 if: left > right
3174 // -1 if: left < right
3175 switch (in_type) {
3176 case DataType::Type::kBool:
3177 case DataType::Type::kUint8:
3178 case DataType::Type::kInt8:
3179 case DataType::Type::kUint16:
3180 case DataType::Type::kInt16:
3181 case DataType::Type::kInt32:
3182 case DataType::Type::kInt64: {
3183 Register result = OutputRegister(compare);
3184 Register left = InputRegisterAt(compare, 0);
3185 Operand right = InputOperandAt(compare, 1);
3186 __ Cmp(left, right);
3187 __ Cset(result, ne); // result == +1 if NE or 0 otherwise
3188 __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise
3189 break;
3190 }
3191 case DataType::Type::kFloat32:
3192 case DataType::Type::kFloat64: {
3193 Register result = OutputRegister(compare);
3194 GenerateFcmp(compare);
3195 __ Cset(result, ne);
3196 __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3197 break;
3198 }
3199 default:
3200 LOG(FATAL) << "Unimplemented compare type " << in_type;
3201 }
3202 }
3203
HandleCondition(HCondition * instruction)3204 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3205 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
3206
3207 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3208 locations->SetInAt(0, Location::RequiresFpuRegister());
3209 locations->SetInAt(1,
3210 IsFloatingPointZeroConstant(instruction->InputAt(1))
3211 ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
3212 : Location::RequiresFpuRegister());
3213 } else {
3214 // Integer cases.
3215 locations->SetInAt(0, Location::RequiresRegister());
3216 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3217 }
3218
3219 if (!instruction->IsEmittedAtUseSite()) {
3220 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3221 }
3222 }
3223
HandleCondition(HCondition * instruction)3224 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3225 if (instruction->IsEmittedAtUseSite()) {
3226 return;
3227 }
3228
3229 LocationSummary* locations = instruction->GetLocations();
3230 Register res = RegisterFrom(locations->Out(), instruction->GetType());
3231 IfCondition if_cond = instruction->GetCondition();
3232
3233 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3234 GenerateFcmp(instruction);
3235 __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3236 } else {
3237 // Integer cases.
3238 Register lhs = InputRegisterAt(instruction, 0);
3239 Operand rhs = InputOperandAt(instruction, 1);
3240 __ Cmp(lhs, rhs);
3241 __ Cset(res, ARM64Condition(if_cond));
3242 }
3243 }
3244
3245 #define FOR_EACH_CONDITION_INSTRUCTION(M) \
3246 M(Equal) \
3247 M(NotEqual) \
3248 M(LessThan) \
3249 M(LessThanOrEqual) \
3250 M(GreaterThan) \
3251 M(GreaterThanOrEqual) \
3252 M(Below) \
3253 M(BelowOrEqual) \
3254 M(Above) \
3255 M(AboveOrEqual)
3256 #define DEFINE_CONDITION_VISITORS(Name) \
3257 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \
3258 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3259 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3260 #undef DEFINE_CONDITION_VISITORS
3261 #undef FOR_EACH_CONDITION_INSTRUCTION
3262
3263 void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3264 DCHECK(instruction->IsDiv() || instruction->IsRem());
3265
3266 LocationSummary* locations = instruction->GetLocations();
3267 Location second = locations->InAt(1);
3268 DCHECK(second.IsConstant());
3269
3270 Register out = OutputRegister(instruction);
3271 Register dividend = InputRegisterAt(instruction, 0);
3272 int64_t imm = Int64FromConstant(second.GetConstant());
3273 DCHECK(imm == 1 || imm == -1);
3274
3275 if (instruction->IsRem()) {
3276 __ Mov(out, 0);
3277 } else {
3278 if (imm == 1) {
3279 __ Mov(out, dividend);
3280 } else {
3281 __ Neg(out, dividend);
3282 }
3283 }
3284 }
3285
DivRemByPowerOfTwo(HBinaryOperation * instruction)3286 void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
3287 DCHECK(instruction->IsDiv() || instruction->IsRem());
3288
3289 LocationSummary* locations = instruction->GetLocations();
3290 Location second = locations->InAt(1);
3291 DCHECK(second.IsConstant());
3292
3293 Register out = OutputRegister(instruction);
3294 Register dividend = InputRegisterAt(instruction, 0);
3295 int64_t imm = Int64FromConstant(second.GetConstant());
3296 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3297 int ctz_imm = CTZ(abs_imm);
3298
3299 UseScratchRegisterScope temps(GetVIXLAssembler());
3300 Register temp = temps.AcquireSameSizeAs(out);
3301
3302 if (instruction->IsDiv()) {
3303 __ Add(temp, dividend, abs_imm - 1);
3304 __ Cmp(dividend, 0);
3305 __ Csel(out, temp, dividend, lt);
3306 if (imm > 0) {
3307 __ Asr(out, out, ctz_imm);
3308 } else {
3309 __ Neg(out, Operand(out, ASR, ctz_imm));
3310 }
3311 } else {
3312 int bits = instruction->GetResultType() == DataType::Type::kInt32 ? 32 : 64;
3313 __ Asr(temp, dividend, bits - 1);
3314 __ Lsr(temp, temp, bits - ctz_imm);
3315 __ Add(out, dividend, temp);
3316 __ And(out, out, abs_imm - 1);
3317 __ Sub(out, out, temp);
3318 }
3319 }
3320
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3321 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3322 DCHECK(instruction->IsDiv() || instruction->IsRem());
3323
3324 LocationSummary* locations = instruction->GetLocations();
3325 Location second = locations->InAt(1);
3326 DCHECK(second.IsConstant());
3327
3328 Register out = OutputRegister(instruction);
3329 Register dividend = InputRegisterAt(instruction, 0);
3330 int64_t imm = Int64FromConstant(second.GetConstant());
3331
3332 DataType::Type type = instruction->GetResultType();
3333 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3334
3335 int64_t magic;
3336 int shift;
3337 CalculateMagicAndShiftForDivRem(
3338 imm, type == DataType::Type::kInt64 /* is_long */, &magic, &shift);
3339
3340 UseScratchRegisterScope temps(GetVIXLAssembler());
3341 Register temp = temps.AcquireSameSizeAs(out);
3342
3343 // temp = get_high(dividend * magic)
3344 __ Mov(temp, magic);
3345 if (type == DataType::Type::kInt64) {
3346 __ Smulh(temp, dividend, temp);
3347 } else {
3348 __ Smull(temp.X(), dividend, temp);
3349 __ Lsr(temp.X(), temp.X(), 32);
3350 }
3351
3352 if (imm > 0 && magic < 0) {
3353 __ Add(temp, temp, dividend);
3354 } else if (imm < 0 && magic > 0) {
3355 __ Sub(temp, temp, dividend);
3356 }
3357
3358 if (shift != 0) {
3359 __ Asr(temp, temp, shift);
3360 }
3361
3362 if (instruction->IsDiv()) {
3363 __ Sub(out, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
3364 } else {
3365 __ Sub(temp, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
3366 // TODO: Strength reduction for msub.
3367 Register temp_imm = temps.AcquireSameSizeAs(out);
3368 __ Mov(temp_imm, imm);
3369 __ Msub(out, temp, temp_imm, dividend);
3370 }
3371 }
3372
GenerateDivRemIntegral(HBinaryOperation * instruction)3373 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3374 DCHECK(instruction->IsDiv() || instruction->IsRem());
3375 DataType::Type type = instruction->GetResultType();
3376 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3377
3378 LocationSummary* locations = instruction->GetLocations();
3379 Register out = OutputRegister(instruction);
3380 Location second = locations->InAt(1);
3381
3382 if (second.IsConstant()) {
3383 int64_t imm = Int64FromConstant(second.GetConstant());
3384
3385 if (imm == 0) {
3386 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3387 } else if (imm == 1 || imm == -1) {
3388 DivRemOneOrMinusOne(instruction);
3389 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3390 DivRemByPowerOfTwo(instruction);
3391 } else {
3392 DCHECK(imm <= -2 || imm >= 2);
3393 GenerateDivRemWithAnyConstant(instruction);
3394 }
3395 } else {
3396 Register dividend = InputRegisterAt(instruction, 0);
3397 Register divisor = InputRegisterAt(instruction, 1);
3398 if (instruction->IsDiv()) {
3399 __ Sdiv(out, dividend, divisor);
3400 } else {
3401 UseScratchRegisterScope temps(GetVIXLAssembler());
3402 Register temp = temps.AcquireSameSizeAs(out);
3403 __ Sdiv(temp, dividend, divisor);
3404 __ Msub(out, temp, divisor, dividend);
3405 }
3406 }
3407 }
3408
VisitDiv(HDiv * div)3409 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3410 LocationSummary* locations =
3411 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3412 switch (div->GetResultType()) {
3413 case DataType::Type::kInt32:
3414 case DataType::Type::kInt64:
3415 locations->SetInAt(0, Location::RequiresRegister());
3416 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3417 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3418 break;
3419
3420 case DataType::Type::kFloat32:
3421 case DataType::Type::kFloat64:
3422 locations->SetInAt(0, Location::RequiresFpuRegister());
3423 locations->SetInAt(1, Location::RequiresFpuRegister());
3424 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3425 break;
3426
3427 default:
3428 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3429 }
3430 }
3431
VisitDiv(HDiv * div)3432 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3433 DataType::Type type = div->GetResultType();
3434 switch (type) {
3435 case DataType::Type::kInt32:
3436 case DataType::Type::kInt64:
3437 GenerateDivRemIntegral(div);
3438 break;
3439
3440 case DataType::Type::kFloat32:
3441 case DataType::Type::kFloat64:
3442 __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3443 break;
3444
3445 default:
3446 LOG(FATAL) << "Unexpected div type " << type;
3447 }
3448 }
3449
VisitDivZeroCheck(HDivZeroCheck * instruction)3450 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3451 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3452 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3453 }
3454
VisitDivZeroCheck(HDivZeroCheck * instruction)3455 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3456 SlowPathCodeARM64* slow_path =
3457 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3458 codegen_->AddSlowPath(slow_path);
3459 Location value = instruction->GetLocations()->InAt(0);
3460
3461 DataType::Type type = instruction->GetType();
3462
3463 if (!DataType::IsIntegralType(type)) {
3464 LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3465 return;
3466 }
3467
3468 if (value.IsConstant()) {
3469 int64_t divisor = Int64ConstantFrom(value);
3470 if (divisor == 0) {
3471 __ B(slow_path->GetEntryLabel());
3472 } else {
3473 // A division by a non-null constant is valid. We don't need to perform
3474 // any check, so simply fall through.
3475 }
3476 } else {
3477 __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3478 }
3479 }
3480
VisitDoubleConstant(HDoubleConstant * constant)3481 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3482 LocationSummary* locations =
3483 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3484 locations->SetOut(Location::ConstantLocation(constant));
3485 }
3486
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3487 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3488 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3489 // Will be generated at use site.
3490 }
3491
VisitExit(HExit * exit)3492 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3493 exit->SetLocations(nullptr);
3494 }
3495
VisitExit(HExit * exit ATTRIBUTE_UNUSED)3496 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3497 }
3498
VisitFloatConstant(HFloatConstant * constant)3499 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3500 LocationSummary* locations =
3501 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3502 locations->SetOut(Location::ConstantLocation(constant));
3503 }
3504
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3505 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3506 // Will be generated at use site.
3507 }
3508
HandleGoto(HInstruction * got,HBasicBlock * successor)3509 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3510 if (successor->IsExitBlock()) {
3511 DCHECK(got->GetPrevious()->AlwaysThrows());
3512 return; // no code needed
3513 }
3514
3515 HBasicBlock* block = got->GetBlock();
3516 HInstruction* previous = got->GetPrevious();
3517 HLoopInformation* info = block->GetLoopInformation();
3518
3519 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3520 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
3521 UseScratchRegisterScope temps(GetVIXLAssembler());
3522 Register temp1 = temps.AcquireX();
3523 Register temp2 = temps.AcquireX();
3524 __ Ldr(temp1, MemOperand(sp, 0));
3525 __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
3526 __ Add(temp2, temp2, 1);
3527 __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
3528 }
3529 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3530 return;
3531 }
3532 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3533 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3534 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
3535 }
3536 if (!codegen_->GoesToNextBlock(block, successor)) {
3537 __ B(codegen_->GetLabelOf(successor));
3538 }
3539 }
3540
VisitGoto(HGoto * got)3541 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3542 got->SetLocations(nullptr);
3543 }
3544
VisitGoto(HGoto * got)3545 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3546 HandleGoto(got, got->GetSuccessor());
3547 }
3548
VisitTryBoundary(HTryBoundary * try_boundary)3549 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3550 try_boundary->SetLocations(nullptr);
3551 }
3552
VisitTryBoundary(HTryBoundary * try_boundary)3553 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3554 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3555 if (!successor->IsExitBlock()) {
3556 HandleGoto(try_boundary, successor);
3557 }
3558 }
3559
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3560 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3561 size_t condition_input_index,
3562 vixl::aarch64::Label* true_target,
3563 vixl::aarch64::Label* false_target) {
3564 HInstruction* cond = instruction->InputAt(condition_input_index);
3565
3566 if (true_target == nullptr && false_target == nullptr) {
3567 // Nothing to do. The code always falls through.
3568 return;
3569 } else if (cond->IsIntConstant()) {
3570 // Constant condition, statically compared against "true" (integer value 1).
3571 if (cond->AsIntConstant()->IsTrue()) {
3572 if (true_target != nullptr) {
3573 __ B(true_target);
3574 }
3575 } else {
3576 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3577 if (false_target != nullptr) {
3578 __ B(false_target);
3579 }
3580 }
3581 return;
3582 }
3583
3584 // The following code generates these patterns:
3585 // (1) true_target == nullptr && false_target != nullptr
3586 // - opposite condition true => branch to false_target
3587 // (2) true_target != nullptr && false_target == nullptr
3588 // - condition true => branch to true_target
3589 // (3) true_target != nullptr && false_target != nullptr
3590 // - condition true => branch to true_target
3591 // - branch to false_target
3592 if (IsBooleanValueOrMaterializedCondition(cond)) {
3593 // The condition instruction has been materialized, compare the output to 0.
3594 Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3595 DCHECK(cond_val.IsRegister());
3596 if (true_target == nullptr) {
3597 __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3598 } else {
3599 __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3600 }
3601 } else {
3602 // The condition instruction has not been materialized, use its inputs as
3603 // the comparison and its condition as the branch condition.
3604 HCondition* condition = cond->AsCondition();
3605
3606 DataType::Type type = condition->InputAt(0)->GetType();
3607 if (DataType::IsFloatingPointType(type)) {
3608 GenerateFcmp(condition);
3609 if (true_target == nullptr) {
3610 IfCondition opposite_condition = condition->GetOppositeCondition();
3611 __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3612 } else {
3613 __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3614 }
3615 } else {
3616 // Integer cases.
3617 Register lhs = InputRegisterAt(condition, 0);
3618 Operand rhs = InputOperandAt(condition, 1);
3619
3620 Condition arm64_cond;
3621 vixl::aarch64::Label* non_fallthrough_target;
3622 if (true_target == nullptr) {
3623 arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3624 non_fallthrough_target = false_target;
3625 } else {
3626 arm64_cond = ARM64Condition(condition->GetCondition());
3627 non_fallthrough_target = true_target;
3628 }
3629
3630 if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3631 rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3632 switch (arm64_cond) {
3633 case eq:
3634 __ Cbz(lhs, non_fallthrough_target);
3635 break;
3636 case ne:
3637 __ Cbnz(lhs, non_fallthrough_target);
3638 break;
3639 case lt:
3640 // Test the sign bit and branch accordingly.
3641 __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3642 break;
3643 case ge:
3644 // Test the sign bit and branch accordingly.
3645 __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3646 break;
3647 default:
3648 // Without the `static_cast` the compiler throws an error for
3649 // `-Werror=sign-promo`.
3650 LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3651 }
3652 } else {
3653 __ Cmp(lhs, rhs);
3654 __ B(arm64_cond, non_fallthrough_target);
3655 }
3656 }
3657 }
3658
3659 // If neither branch falls through (case 3), the conditional branch to `true_target`
3660 // was already emitted (case 2) and we need to emit a jump to `false_target`.
3661 if (true_target != nullptr && false_target != nullptr) {
3662 __ B(false_target);
3663 }
3664 }
3665
VisitIf(HIf * if_instr)3666 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3667 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3668 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3669 locations->SetInAt(0, Location::RequiresRegister());
3670 }
3671 }
3672
VisitIf(HIf * if_instr)3673 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3674 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3675 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3676 vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3677 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3678 true_target = nullptr;
3679 }
3680 vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3681 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3682 false_target = nullptr;
3683 }
3684 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
3685 }
3686
VisitDeoptimize(HDeoptimize * deoptimize)3687 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3688 LocationSummary* locations = new (GetGraph()->GetAllocator())
3689 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3690 InvokeRuntimeCallingConvention calling_convention;
3691 RegisterSet caller_saves = RegisterSet::Empty();
3692 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3693 locations->SetCustomSlowPathCallerSaves(caller_saves);
3694 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3695 locations->SetInAt(0, Location::RequiresRegister());
3696 }
3697 }
3698
VisitDeoptimize(HDeoptimize * deoptimize)3699 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3700 SlowPathCodeARM64* slow_path =
3701 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3702 GenerateTestAndBranch(deoptimize,
3703 /* condition_input_index */ 0,
3704 slow_path->GetEntryLabel(),
3705 /* false_target */ nullptr);
3706 }
3707
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3708 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3709 LocationSummary* locations = new (GetGraph()->GetAllocator())
3710 LocationSummary(flag, LocationSummary::kNoCall);
3711 locations->SetOut(Location::RequiresRegister());
3712 }
3713
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3714 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3715 __ Ldr(OutputRegister(flag),
3716 MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3717 }
3718
IsConditionOnFloatingPointValues(HInstruction * condition)3719 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3720 return condition->IsCondition() &&
3721 DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
3722 }
3723
GetConditionForSelect(HCondition * condition)3724 static inline Condition GetConditionForSelect(HCondition* condition) {
3725 IfCondition cond = condition->AsCondition()->GetCondition();
3726 return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3727 : ARM64Condition(cond);
3728 }
3729
VisitSelect(HSelect * select)3730 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3731 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3732 if (DataType::IsFloatingPointType(select->GetType())) {
3733 locations->SetInAt(0, Location::RequiresFpuRegister());
3734 locations->SetInAt(1, Location::RequiresFpuRegister());
3735 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3736 } else {
3737 HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3738 HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3739 bool is_true_value_constant = cst_true_value != nullptr;
3740 bool is_false_value_constant = cst_false_value != nullptr;
3741 // Ask VIXL whether we should synthesize constants in registers.
3742 // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3743 Operand true_op = is_true_value_constant ?
3744 Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3745 Operand false_op = is_false_value_constant ?
3746 Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3747 bool true_value_in_register = false;
3748 bool false_value_in_register = false;
3749 MacroAssembler::GetCselSynthesisInformation(
3750 x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3751 true_value_in_register |= !is_true_value_constant;
3752 false_value_in_register |= !is_false_value_constant;
3753
3754 locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3755 : Location::ConstantLocation(cst_true_value));
3756 locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3757 : Location::ConstantLocation(cst_false_value));
3758 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3759 }
3760
3761 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3762 locations->SetInAt(2, Location::RequiresRegister());
3763 }
3764 }
3765
VisitSelect(HSelect * select)3766 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3767 HInstruction* cond = select->GetCondition();
3768 Condition csel_cond;
3769
3770 if (IsBooleanValueOrMaterializedCondition(cond)) {
3771 if (cond->IsCondition() && cond->GetNext() == select) {
3772 // Use the condition flags set by the previous instruction.
3773 csel_cond = GetConditionForSelect(cond->AsCondition());
3774 } else {
3775 __ Cmp(InputRegisterAt(select, 2), 0);
3776 csel_cond = ne;
3777 }
3778 } else if (IsConditionOnFloatingPointValues(cond)) {
3779 GenerateFcmp(cond);
3780 csel_cond = GetConditionForSelect(cond->AsCondition());
3781 } else {
3782 __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3783 csel_cond = GetConditionForSelect(cond->AsCondition());
3784 }
3785
3786 if (DataType::IsFloatingPointType(select->GetType())) {
3787 __ Fcsel(OutputFPRegister(select),
3788 InputFPRegisterAt(select, 1),
3789 InputFPRegisterAt(select, 0),
3790 csel_cond);
3791 } else {
3792 __ Csel(OutputRegister(select),
3793 InputOperandAt(select, 1),
3794 InputOperandAt(select, 0),
3795 csel_cond);
3796 }
3797 }
3798
VisitNativeDebugInfo(HNativeDebugInfo * info)3799 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3800 new (GetGraph()->GetAllocator()) LocationSummary(info);
3801 }
3802
VisitNativeDebugInfo(HNativeDebugInfo *)3803 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
3804 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3805 }
3806
GenerateNop()3807 void CodeGeneratorARM64::GenerateNop() {
3808 __ Nop();
3809 }
3810
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3811 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3812 HandleFieldGet(instruction, instruction->GetFieldInfo());
3813 }
3814
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3815 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3816 HandleFieldGet(instruction, instruction->GetFieldInfo());
3817 }
3818
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3819 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3820 HandleFieldSet(instruction);
3821 }
3822
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3823 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3824 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
3825 }
3826
3827 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)3828 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
3829 if (kEmitCompilerReadBarrier &&
3830 (kUseBakerReadBarrier ||
3831 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
3832 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
3833 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
3834 return 1;
3835 }
3836 return 0;
3837 }
3838
3839 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
3840 // interface pointer, one for loading the current interface.
3841 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)3842 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
3843 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
3844 return 3;
3845 }
3846 return 1 + NumberOfInstanceOfTemps(type_check_kind);
3847 }
3848
VisitInstanceOf(HInstanceOf * instruction)3849 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
3850 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
3851 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3852 bool baker_read_barrier_slow_path = false;
3853 switch (type_check_kind) {
3854 case TypeCheckKind::kExactCheck:
3855 case TypeCheckKind::kAbstractClassCheck:
3856 case TypeCheckKind::kClassHierarchyCheck:
3857 case TypeCheckKind::kArrayObjectCheck: {
3858 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
3859 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
3860 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
3861 break;
3862 }
3863 case TypeCheckKind::kArrayCheck:
3864 case TypeCheckKind::kUnresolvedCheck:
3865 case TypeCheckKind::kInterfaceCheck:
3866 call_kind = LocationSummary::kCallOnSlowPath;
3867 break;
3868 }
3869
3870 LocationSummary* locations =
3871 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
3872 if (baker_read_barrier_slow_path) {
3873 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
3874 }
3875 locations->SetInAt(0, Location::RequiresRegister());
3876 locations->SetInAt(1, Location::RequiresRegister());
3877 // The "out" register is used as a temporary, so it overlaps with the inputs.
3878 // Note that TypeCheckSlowPathARM64 uses this register too.
3879 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3880 // Add temps if necessary for read barriers.
3881 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
3882 }
3883
VisitInstanceOf(HInstanceOf * instruction)3884 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
3885 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3886 LocationSummary* locations = instruction->GetLocations();
3887 Location obj_loc = locations->InAt(0);
3888 Register obj = InputRegisterAt(instruction, 0);
3889 Register cls = InputRegisterAt(instruction, 1);
3890 Location out_loc = locations->Out();
3891 Register out = OutputRegister(instruction);
3892 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
3893 DCHECK_LE(num_temps, 1u);
3894 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
3895 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3896 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3897 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3898 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3899
3900 vixl::aarch64::Label done, zero;
3901 SlowPathCodeARM64* slow_path = nullptr;
3902
3903 // Return 0 if `obj` is null.
3904 // Avoid null check if we know `obj` is not null.
3905 if (instruction->MustDoNullCheck()) {
3906 __ Cbz(obj, &zero);
3907 }
3908
3909 switch (type_check_kind) {
3910 case TypeCheckKind::kExactCheck: {
3911 ReadBarrierOption read_barrier_option =
3912 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3913 // /* HeapReference<Class> */ out = obj->klass_
3914 GenerateReferenceLoadTwoRegisters(instruction,
3915 out_loc,
3916 obj_loc,
3917 class_offset,
3918 maybe_temp_loc,
3919 read_barrier_option);
3920 __ Cmp(out, cls);
3921 __ Cset(out, eq);
3922 if (zero.IsLinked()) {
3923 __ B(&done);
3924 }
3925 break;
3926 }
3927
3928 case TypeCheckKind::kAbstractClassCheck: {
3929 ReadBarrierOption read_barrier_option =
3930 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3931 // /* HeapReference<Class> */ out = obj->klass_
3932 GenerateReferenceLoadTwoRegisters(instruction,
3933 out_loc,
3934 obj_loc,
3935 class_offset,
3936 maybe_temp_loc,
3937 read_barrier_option);
3938 // If the class is abstract, we eagerly fetch the super class of the
3939 // object to avoid doing a comparison we know will fail.
3940 vixl::aarch64::Label loop, success;
3941 __ Bind(&loop);
3942 // /* HeapReference<Class> */ out = out->super_class_
3943 GenerateReferenceLoadOneRegister(instruction,
3944 out_loc,
3945 super_offset,
3946 maybe_temp_loc,
3947 read_barrier_option);
3948 // If `out` is null, we use it for the result, and jump to `done`.
3949 __ Cbz(out, &done);
3950 __ Cmp(out, cls);
3951 __ B(ne, &loop);
3952 __ Mov(out, 1);
3953 if (zero.IsLinked()) {
3954 __ B(&done);
3955 }
3956 break;
3957 }
3958
3959 case TypeCheckKind::kClassHierarchyCheck: {
3960 ReadBarrierOption read_barrier_option =
3961 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3962 // /* HeapReference<Class> */ out = obj->klass_
3963 GenerateReferenceLoadTwoRegisters(instruction,
3964 out_loc,
3965 obj_loc,
3966 class_offset,
3967 maybe_temp_loc,
3968 read_barrier_option);
3969 // Walk over the class hierarchy to find a match.
3970 vixl::aarch64::Label loop, success;
3971 __ Bind(&loop);
3972 __ Cmp(out, cls);
3973 __ B(eq, &success);
3974 // /* HeapReference<Class> */ out = out->super_class_
3975 GenerateReferenceLoadOneRegister(instruction,
3976 out_loc,
3977 super_offset,
3978 maybe_temp_loc,
3979 read_barrier_option);
3980 __ Cbnz(out, &loop);
3981 // If `out` is null, we use it for the result, and jump to `done`.
3982 __ B(&done);
3983 __ Bind(&success);
3984 __ Mov(out, 1);
3985 if (zero.IsLinked()) {
3986 __ B(&done);
3987 }
3988 break;
3989 }
3990
3991 case TypeCheckKind::kArrayObjectCheck: {
3992 ReadBarrierOption read_barrier_option =
3993 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3994 // /* HeapReference<Class> */ out = obj->klass_
3995 GenerateReferenceLoadTwoRegisters(instruction,
3996 out_loc,
3997 obj_loc,
3998 class_offset,
3999 maybe_temp_loc,
4000 read_barrier_option);
4001 // Do an exact check.
4002 vixl::aarch64::Label exact_check;
4003 __ Cmp(out, cls);
4004 __ B(eq, &exact_check);
4005 // Otherwise, we need to check that the object's class is a non-primitive array.
4006 // /* HeapReference<Class> */ out = out->component_type_
4007 GenerateReferenceLoadOneRegister(instruction,
4008 out_loc,
4009 component_offset,
4010 maybe_temp_loc,
4011 read_barrier_option);
4012 // If `out` is null, we use it for the result, and jump to `done`.
4013 __ Cbz(out, &done);
4014 __ Ldrh(out, HeapOperand(out, primitive_offset));
4015 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4016 __ Cbnz(out, &zero);
4017 __ Bind(&exact_check);
4018 __ Mov(out, 1);
4019 __ B(&done);
4020 break;
4021 }
4022
4023 case TypeCheckKind::kArrayCheck: {
4024 // No read barrier since the slow path will retry upon failure.
4025 // /* HeapReference<Class> */ out = obj->klass_
4026 GenerateReferenceLoadTwoRegisters(instruction,
4027 out_loc,
4028 obj_loc,
4029 class_offset,
4030 maybe_temp_loc,
4031 kWithoutReadBarrier);
4032 __ Cmp(out, cls);
4033 DCHECK(locations->OnlyCallsOnSlowPath());
4034 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4035 instruction, /* is_fatal */ false);
4036 codegen_->AddSlowPath(slow_path);
4037 __ B(ne, slow_path->GetEntryLabel());
4038 __ Mov(out, 1);
4039 if (zero.IsLinked()) {
4040 __ B(&done);
4041 }
4042 break;
4043 }
4044
4045 case TypeCheckKind::kUnresolvedCheck:
4046 case TypeCheckKind::kInterfaceCheck: {
4047 // Note that we indeed only call on slow path, but we always go
4048 // into the slow path for the unresolved and interface check
4049 // cases.
4050 //
4051 // We cannot directly call the InstanceofNonTrivial runtime
4052 // entry point without resorting to a type checking slow path
4053 // here (i.e. by calling InvokeRuntime directly), as it would
4054 // require to assign fixed registers for the inputs of this
4055 // HInstanceOf instruction (following the runtime calling
4056 // convention), which might be cluttered by the potential first
4057 // read barrier emission at the beginning of this method.
4058 //
4059 // TODO: Introduce a new runtime entry point taking the object
4060 // to test (instead of its class) as argument, and let it deal
4061 // with the read barrier issues. This will let us refactor this
4062 // case of the `switch` code as it was previously (with a direct
4063 // call to the runtime not using a type checking slow path).
4064 // This should also be beneficial for the other cases above.
4065 DCHECK(locations->OnlyCallsOnSlowPath());
4066 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4067 instruction, /* is_fatal */ false);
4068 codegen_->AddSlowPath(slow_path);
4069 __ B(slow_path->GetEntryLabel());
4070 if (zero.IsLinked()) {
4071 __ B(&done);
4072 }
4073 break;
4074 }
4075 }
4076
4077 if (zero.IsLinked()) {
4078 __ Bind(&zero);
4079 __ Mov(out, 0);
4080 }
4081
4082 if (done.IsLinked()) {
4083 __ Bind(&done);
4084 }
4085
4086 if (slow_path != nullptr) {
4087 __ Bind(slow_path->GetExitLabel());
4088 }
4089 }
4090
VisitCheckCast(HCheckCast * instruction)4091 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4092 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4093 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
4094 LocationSummary* locations =
4095 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4096 locations->SetInAt(0, Location::RequiresRegister());
4097 locations->SetInAt(1, Location::RequiresRegister());
4098 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
4099 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
4100 }
4101
VisitCheckCast(HCheckCast * instruction)4102 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4103 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4104 LocationSummary* locations = instruction->GetLocations();
4105 Location obj_loc = locations->InAt(0);
4106 Register obj = InputRegisterAt(instruction, 0);
4107 Register cls = InputRegisterAt(instruction, 1);
4108 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
4109 DCHECK_GE(num_temps, 1u);
4110 DCHECK_LE(num_temps, 3u);
4111 Location temp_loc = locations->GetTemp(0);
4112 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4113 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4114 Register temp = WRegisterFrom(temp_loc);
4115 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4116 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4117 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4118 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4119 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4120 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4121 const uint32_t object_array_data_offset =
4122 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4123
4124 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
4125 SlowPathCodeARM64* type_check_slow_path =
4126 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4127 instruction, is_type_check_slow_path_fatal);
4128 codegen_->AddSlowPath(type_check_slow_path);
4129
4130 vixl::aarch64::Label done;
4131 // Avoid null check if we know obj is not null.
4132 if (instruction->MustDoNullCheck()) {
4133 __ Cbz(obj, &done);
4134 }
4135
4136 switch (type_check_kind) {
4137 case TypeCheckKind::kExactCheck:
4138 case TypeCheckKind::kArrayCheck: {
4139 // /* HeapReference<Class> */ temp = obj->klass_
4140 GenerateReferenceLoadTwoRegisters(instruction,
4141 temp_loc,
4142 obj_loc,
4143 class_offset,
4144 maybe_temp2_loc,
4145 kWithoutReadBarrier);
4146
4147 __ Cmp(temp, cls);
4148 // Jump to slow path for throwing the exception or doing a
4149 // more involved array check.
4150 __ B(ne, type_check_slow_path->GetEntryLabel());
4151 break;
4152 }
4153
4154 case TypeCheckKind::kAbstractClassCheck: {
4155 // /* HeapReference<Class> */ temp = obj->klass_
4156 GenerateReferenceLoadTwoRegisters(instruction,
4157 temp_loc,
4158 obj_loc,
4159 class_offset,
4160 maybe_temp2_loc,
4161 kWithoutReadBarrier);
4162
4163 // If the class is abstract, we eagerly fetch the super class of the
4164 // object to avoid doing a comparison we know will fail.
4165 vixl::aarch64::Label loop;
4166 __ Bind(&loop);
4167 // /* HeapReference<Class> */ temp = temp->super_class_
4168 GenerateReferenceLoadOneRegister(instruction,
4169 temp_loc,
4170 super_offset,
4171 maybe_temp2_loc,
4172 kWithoutReadBarrier);
4173
4174 // If the class reference currently in `temp` is null, jump to the slow path to throw the
4175 // exception.
4176 __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4177 // Otherwise, compare classes.
4178 __ Cmp(temp, cls);
4179 __ B(ne, &loop);
4180 break;
4181 }
4182
4183 case TypeCheckKind::kClassHierarchyCheck: {
4184 // /* HeapReference<Class> */ temp = obj->klass_
4185 GenerateReferenceLoadTwoRegisters(instruction,
4186 temp_loc,
4187 obj_loc,
4188 class_offset,
4189 maybe_temp2_loc,
4190 kWithoutReadBarrier);
4191
4192 // Walk over the class hierarchy to find a match.
4193 vixl::aarch64::Label loop;
4194 __ Bind(&loop);
4195 __ Cmp(temp, cls);
4196 __ B(eq, &done);
4197
4198 // /* HeapReference<Class> */ temp = temp->super_class_
4199 GenerateReferenceLoadOneRegister(instruction,
4200 temp_loc,
4201 super_offset,
4202 maybe_temp2_loc,
4203 kWithoutReadBarrier);
4204
4205 // If the class reference currently in `temp` is not null, jump
4206 // back at the beginning of the loop.
4207 __ Cbnz(temp, &loop);
4208 // Otherwise, jump to the slow path to throw the exception.
4209 __ B(type_check_slow_path->GetEntryLabel());
4210 break;
4211 }
4212
4213 case TypeCheckKind::kArrayObjectCheck: {
4214 // /* HeapReference<Class> */ temp = obj->klass_
4215 GenerateReferenceLoadTwoRegisters(instruction,
4216 temp_loc,
4217 obj_loc,
4218 class_offset,
4219 maybe_temp2_loc,
4220 kWithoutReadBarrier);
4221
4222 // Do an exact check.
4223 __ Cmp(temp, cls);
4224 __ B(eq, &done);
4225
4226 // Otherwise, we need to check that the object's class is a non-primitive array.
4227 // /* HeapReference<Class> */ temp = temp->component_type_
4228 GenerateReferenceLoadOneRegister(instruction,
4229 temp_loc,
4230 component_offset,
4231 maybe_temp2_loc,
4232 kWithoutReadBarrier);
4233
4234 // If the component type is null, jump to the slow path to throw the exception.
4235 __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4236 // Otherwise, the object is indeed an array. Further check that this component type is not a
4237 // primitive type.
4238 __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4239 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4240 __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4241 break;
4242 }
4243
4244 case TypeCheckKind::kUnresolvedCheck:
4245 // We always go into the type check slow path for the unresolved check cases.
4246 //
4247 // We cannot directly call the CheckCast runtime entry point
4248 // without resorting to a type checking slow path here (i.e. by
4249 // calling InvokeRuntime directly), as it would require to
4250 // assign fixed registers for the inputs of this HInstanceOf
4251 // instruction (following the runtime calling convention), which
4252 // might be cluttered by the potential first read barrier
4253 // emission at the beginning of this method.
4254 __ B(type_check_slow_path->GetEntryLabel());
4255 break;
4256 case TypeCheckKind::kInterfaceCheck: {
4257 // /* HeapReference<Class> */ temp = obj->klass_
4258 GenerateReferenceLoadTwoRegisters(instruction,
4259 temp_loc,
4260 obj_loc,
4261 class_offset,
4262 maybe_temp2_loc,
4263 kWithoutReadBarrier);
4264
4265 // /* HeapReference<Class> */ temp = temp->iftable_
4266 GenerateReferenceLoadTwoRegisters(instruction,
4267 temp_loc,
4268 temp_loc,
4269 iftable_offset,
4270 maybe_temp2_loc,
4271 kWithoutReadBarrier);
4272 // Iftable is never null.
4273 __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4274 // Loop through the iftable and check if any class matches.
4275 vixl::aarch64::Label start_loop;
4276 __ Bind(&start_loop);
4277 __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4278 __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4279 GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4280 // Go to next interface.
4281 __ Add(temp, temp, 2 * kHeapReferenceSize);
4282 __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4283 // Compare the classes and continue the loop if they do not match.
4284 __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4285 __ B(ne, &start_loop);
4286 break;
4287 }
4288 }
4289 __ Bind(&done);
4290
4291 __ Bind(type_check_slow_path->GetExitLabel());
4292 }
4293
VisitIntConstant(HIntConstant * constant)4294 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4295 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4296 locations->SetOut(Location::ConstantLocation(constant));
4297 }
4298
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)4299 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
4300 // Will be generated at use site.
4301 }
4302
VisitNullConstant(HNullConstant * constant)4303 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4304 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4305 locations->SetOut(Location::ConstantLocation(constant));
4306 }
4307
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)4308 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
4309 // Will be generated at use site.
4310 }
4311
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4312 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4313 // The trampoline uses the same calling convention as dex calling conventions,
4314 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4315 // the method_idx.
4316 HandleInvoke(invoke);
4317 }
4318
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4319 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4320 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4321 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4322 }
4323
HandleInvoke(HInvoke * invoke)4324 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4325 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4326 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4327 }
4328
VisitInvokeInterface(HInvokeInterface * invoke)4329 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4330 HandleInvoke(invoke);
4331 }
4332
VisitInvokeInterface(HInvokeInterface * invoke)4333 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4334 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4335 LocationSummary* locations = invoke->GetLocations();
4336 Register temp = XRegisterFrom(locations->GetTemp(0));
4337 Location receiver = locations->InAt(0);
4338 Offset class_offset = mirror::Object::ClassOffset();
4339 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4340
4341 // The register ip1 is required to be used for the hidden argument in
4342 // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4343 MacroAssembler* masm = GetVIXLAssembler();
4344 UseScratchRegisterScope scratch_scope(masm);
4345 scratch_scope.Exclude(ip1);
4346 __ Mov(ip1, invoke->GetDexMethodIndex());
4347
4348 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4349 if (receiver.IsStackSlot()) {
4350 __ Ldr(temp.W(), StackOperandFrom(receiver));
4351 {
4352 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4353 // /* HeapReference<Class> */ temp = temp->klass_
4354 __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4355 codegen_->MaybeRecordImplicitNullCheck(invoke);
4356 }
4357 } else {
4358 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4359 // /* HeapReference<Class> */ temp = receiver->klass_
4360 __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4361 codegen_->MaybeRecordImplicitNullCheck(invoke);
4362 }
4363
4364 // Instead of simply (possibly) unpoisoning `temp` here, we should
4365 // emit a read barrier for the previous class reference load.
4366 // However this is not required in practice, as this is an
4367 // intermediate/temporary reference and because the current
4368 // concurrent copying collector keeps the from-space memory
4369 // intact/accessible until the end of the marking phase (the
4370 // concurrent copying collector may not in the future).
4371 GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4372 __ Ldr(temp,
4373 MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4374 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4375 invoke->GetImtIndex(), kArm64PointerSize));
4376 // temp = temp->GetImtEntryAt(method_offset);
4377 __ Ldr(temp, MemOperand(temp, method_offset));
4378 // lr = temp->GetEntryPoint();
4379 __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4380
4381 {
4382 // Ensure the pc position is recorded immediately after the `blr` instruction.
4383 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4384
4385 // lr();
4386 __ blr(lr);
4387 DCHECK(!codegen_->IsLeafMethod());
4388 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4389 }
4390
4391 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4392 }
4393
VisitInvokeVirtual(HInvokeVirtual * invoke)4394 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4395 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4396 if (intrinsic.TryDispatch(invoke)) {
4397 return;
4398 }
4399
4400 HandleInvoke(invoke);
4401 }
4402
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4403 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4404 // Explicit clinit checks triggered by static invokes must have been pruned by
4405 // art::PrepareForRegisterAllocation.
4406 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4407
4408 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4409 if (intrinsic.TryDispatch(invoke)) {
4410 return;
4411 }
4412
4413 HandleInvoke(invoke);
4414 }
4415
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4416 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4417 if (invoke->GetLocations()->Intrinsified()) {
4418 IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4419 intrinsic.Dispatch(invoke);
4420 return true;
4421 }
4422 return false;
4423 }
4424
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)4425 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4426 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4427 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
4428 // On ARM64 we support all dispatch types.
4429 return desired_dispatch_info;
4430 }
4431
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4432 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
4433 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4434 // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4435 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
4436 switch (invoke->GetMethodLoadKind()) {
4437 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4438 uint32_t offset =
4439 GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4440 // temp = thread->string_init_entrypoint
4441 __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4442 break;
4443 }
4444 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4445 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4446 break;
4447 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4448 DCHECK(GetCompilerOptions().IsBootImage());
4449 // Add ADRP with its PC-relative method patch.
4450 vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod());
4451 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4452 // Add ADD with its PC-relative method patch.
4453 vixl::aarch64::Label* add_label =
4454 NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label);
4455 EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
4456 break;
4457 }
4458 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
4459 // Load method address from literal pool.
4460 __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
4461 break;
4462 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4463 // Add ADRP with its PC-relative DexCache access patch.
4464 MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
4465 vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
4466 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4467 // Add LDR with its PC-relative DexCache access patch.
4468 vixl::aarch64::Label* ldr_label =
4469 NewMethodBssEntryPatch(target_method, adrp_label);
4470 EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4471 break;
4472 }
4473 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
4474 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4475 return; // No code pointer retrieval; the runtime performs the call directly.
4476 }
4477 }
4478
4479 switch (invoke->GetCodePtrLocation()) {
4480 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4481 {
4482 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4483 ExactAssemblyScope eas(GetVIXLAssembler(),
4484 kInstructionSize,
4485 CodeBufferCheckScope::kExactSize);
4486 __ bl(&frame_entry_label_);
4487 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4488 }
4489 break;
4490 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4491 // LR = callee_method->entry_point_from_quick_compiled_code_;
4492 __ Ldr(lr, MemOperand(
4493 XRegisterFrom(callee_method),
4494 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
4495 {
4496 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4497 ExactAssemblyScope eas(GetVIXLAssembler(),
4498 kInstructionSize,
4499 CodeBufferCheckScope::kExactSize);
4500 // lr()
4501 __ blr(lr);
4502 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4503 }
4504 break;
4505 }
4506
4507 DCHECK(!IsLeafMethod());
4508 }
4509
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)4510 void CodeGeneratorARM64::GenerateVirtualCall(
4511 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4512 // Use the calling convention instead of the location of the receiver, as
4513 // intrinsics may have put the receiver in a different register. In the intrinsics
4514 // slow path, the arguments have been moved to the right place, so here we are
4515 // guaranteed that the receiver is the first register of the calling convention.
4516 InvokeDexCallingConvention calling_convention;
4517 Register receiver = calling_convention.GetRegisterAt(0);
4518 Register temp = XRegisterFrom(temp_in);
4519 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4520 invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4521 Offset class_offset = mirror::Object::ClassOffset();
4522 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4523
4524 DCHECK(receiver.IsRegister());
4525
4526 {
4527 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4528 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4529 // /* HeapReference<Class> */ temp = receiver->klass_
4530 __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4531 MaybeRecordImplicitNullCheck(invoke);
4532 }
4533 // Instead of simply (possibly) unpoisoning `temp` here, we should
4534 // emit a read barrier for the previous class reference load.
4535 // intermediate/temporary reference and because the current
4536 // concurrent copying collector keeps the from-space memory
4537 // intact/accessible until the end of the marking phase (the
4538 // concurrent copying collector may not in the future).
4539 GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4540 // temp = temp->GetMethodAt(method_offset);
4541 __ Ldr(temp, MemOperand(temp, method_offset));
4542 // lr = temp->GetEntryPoint();
4543 __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4544 {
4545 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4546 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4547 // lr();
4548 __ blr(lr);
4549 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4550 }
4551 }
4552
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4553 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4554 HandleInvoke(invoke);
4555 }
4556
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4557 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4558 codegen_->GenerateInvokePolymorphicCall(invoke);
4559 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4560 }
4561
NewBootImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4562 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
4563 MethodReference target_method,
4564 vixl::aarch64::Label* adrp_label) {
4565 return NewPcRelativePatch(
4566 target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
4567 }
4568
NewMethodBssEntryPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4569 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
4570 MethodReference target_method,
4571 vixl::aarch64::Label* adrp_label) {
4572 return NewPcRelativePatch(
4573 target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
4574 }
4575
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4576 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
4577 const DexFile& dex_file,
4578 dex::TypeIndex type_index,
4579 vixl::aarch64::Label* adrp_label) {
4580 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
4581 }
4582
NewBssEntryTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4583 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
4584 const DexFile& dex_file,
4585 dex::TypeIndex type_index,
4586 vixl::aarch64::Label* adrp_label) {
4587 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
4588 }
4589
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4590 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
4591 const DexFile& dex_file,
4592 dex::StringIndex string_index,
4593 vixl::aarch64::Label* adrp_label) {
4594 return NewPcRelativePatch(
4595 &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
4596 }
4597
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4598 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
4599 const DexFile& dex_file,
4600 dex::StringIndex string_index,
4601 vixl::aarch64::Label* adrp_label) {
4602 return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
4603 }
4604
NewBakerReadBarrierPatch(uint32_t custom_data)4605 vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
4606 baker_read_barrier_patches_.emplace_back(custom_data);
4607 return &baker_read_barrier_patches_.back().label;
4608 }
4609
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)4610 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
4611 const DexFile* dex_file,
4612 uint32_t offset_or_index,
4613 vixl::aarch64::Label* adrp_label,
4614 ArenaDeque<PcRelativePatchInfo>* patches) {
4615 // Add a patch entry and return the label.
4616 patches->emplace_back(dex_file, offset_or_index);
4617 PcRelativePatchInfo* info = &patches->back();
4618 vixl::aarch64::Label* label = &info->label;
4619 // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
4620 info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
4621 return label;
4622 }
4623
DeduplicateBootImageAddressLiteral(uint64_t address)4624 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
4625 uint64_t address) {
4626 return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
4627 }
4628
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)4629 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
4630 const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
4631 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
4632 return jit_string_patches_.GetOrCreate(
4633 StringReference(&dex_file, string_index),
4634 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4635 }
4636
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)4637 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
4638 const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
4639 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
4640 return jit_class_patches_.GetOrCreate(
4641 TypeReference(&dex_file, type_index),
4642 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4643 }
4644
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)4645 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
4646 vixl::aarch64::Register reg) {
4647 DCHECK(reg.IsX());
4648 SingleEmissionCheckScope guard(GetVIXLAssembler());
4649 __ Bind(fixup_label);
4650 __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
4651 }
4652
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4653 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
4654 vixl::aarch64::Register out,
4655 vixl::aarch64::Register base) {
4656 DCHECK(out.IsX());
4657 DCHECK(base.IsX());
4658 SingleEmissionCheckScope guard(GetVIXLAssembler());
4659 __ Bind(fixup_label);
4660 __ add(out, base, Operand(/* offset placeholder */ 0));
4661 }
4662
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4663 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
4664 vixl::aarch64::Register out,
4665 vixl::aarch64::Register base) {
4666 DCHECK(base.IsX());
4667 SingleEmissionCheckScope guard(GetVIXLAssembler());
4668 __ Bind(fixup_label);
4669 __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
4670 }
4671
4672 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)4673 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
4674 const ArenaDeque<PcRelativePatchInfo>& infos,
4675 ArenaVector<linker::LinkerPatch>* linker_patches) {
4676 for (const PcRelativePatchInfo& info : infos) {
4677 linker_patches->push_back(Factory(info.label.GetLocation(),
4678 info.target_dex_file,
4679 info.pc_insn_label->GetLocation(),
4680 info.offset_or_index));
4681 }
4682 }
4683
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)4684 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
4685 DCHECK(linker_patches->empty());
4686 size_t size =
4687 boot_image_method_patches_.size() +
4688 method_bss_entry_patches_.size() +
4689 boot_image_type_patches_.size() +
4690 type_bss_entry_patches_.size() +
4691 boot_image_string_patches_.size() +
4692 string_bss_entry_patches_.size() +
4693 baker_read_barrier_patches_.size();
4694 linker_patches->reserve(size);
4695 if (GetCompilerOptions().IsBootImage()) {
4696 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
4697 boot_image_method_patches_, linker_patches);
4698 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
4699 boot_image_type_patches_, linker_patches);
4700 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
4701 boot_image_string_patches_, linker_patches);
4702 } else {
4703 DCHECK(boot_image_method_patches_.empty());
4704 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
4705 boot_image_type_patches_, linker_patches);
4706 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
4707 boot_image_string_patches_, linker_patches);
4708 }
4709 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
4710 method_bss_entry_patches_, linker_patches);
4711 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
4712 type_bss_entry_patches_, linker_patches);
4713 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
4714 string_bss_entry_patches_, linker_patches);
4715 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
4716 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
4717 info.label.GetLocation(), info.custom_data));
4718 }
4719 DCHECK_EQ(size, linker_patches->size());
4720 }
4721
DeduplicateUint32Literal(uint32_t value)4722 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
4723 return uint32_literals_.GetOrCreate(
4724 value,
4725 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
4726 }
4727
DeduplicateUint64Literal(uint64_t value)4728 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
4729 return uint64_literals_.GetOrCreate(
4730 value,
4731 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
4732 }
4733
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4734 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4735 // Explicit clinit checks triggered by static invokes must have been pruned by
4736 // art::PrepareForRegisterAllocation.
4737 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4738
4739 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4740 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4741 return;
4742 }
4743
4744 {
4745 // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
4746 // are no pools emitted.
4747 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4748 LocationSummary* locations = invoke->GetLocations();
4749 codegen_->GenerateStaticOrDirectCall(
4750 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
4751 }
4752
4753 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4754 }
4755
VisitInvokeVirtual(HInvokeVirtual * invoke)4756 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4757 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4758 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4759 return;
4760 }
4761
4762 {
4763 // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
4764 // are no pools emitted.
4765 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4766 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
4767 DCHECK(!codegen_->IsLeafMethod());
4768 }
4769
4770 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4771 }
4772
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)4773 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
4774 HLoadClass::LoadKind desired_class_load_kind) {
4775 switch (desired_class_load_kind) {
4776 case HLoadClass::LoadKind::kInvalid:
4777 LOG(FATAL) << "UNREACHABLE";
4778 UNREACHABLE();
4779 case HLoadClass::LoadKind::kReferrersClass:
4780 break;
4781 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
4782 case HLoadClass::LoadKind::kBootImageClassTable:
4783 case HLoadClass::LoadKind::kBssEntry:
4784 DCHECK(!Runtime::Current()->UseJitCompilation());
4785 break;
4786 case HLoadClass::LoadKind::kJitTableAddress:
4787 DCHECK(Runtime::Current()->UseJitCompilation());
4788 break;
4789 case HLoadClass::LoadKind::kBootImageAddress:
4790 case HLoadClass::LoadKind::kRuntimeCall:
4791 break;
4792 }
4793 return desired_class_load_kind;
4794 }
4795
VisitLoadClass(HLoadClass * cls)4796 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
4797 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4798 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
4799 InvokeRuntimeCallingConvention calling_convention;
4800 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
4801 cls,
4802 LocationFrom(calling_convention.GetRegisterAt(0)),
4803 LocationFrom(vixl::aarch64::x0));
4804 DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
4805 return;
4806 }
4807 DCHECK(!cls->NeedsAccessCheck());
4808
4809 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
4810 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
4811 ? LocationSummary::kCallOnSlowPath
4812 : LocationSummary::kNoCall;
4813 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
4814 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
4815 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4816 }
4817
4818 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
4819 locations->SetInAt(0, Location::RequiresRegister());
4820 }
4821 locations->SetOut(Location::RequiresRegister());
4822 if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
4823 if (!kUseReadBarrier || kUseBakerReadBarrier) {
4824 // Rely on the type resolution or initialization and marking to save everything we need.
4825 RegisterSet caller_saves = RegisterSet::Empty();
4826 InvokeRuntimeCallingConvention calling_convention;
4827 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
4828 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
4829 RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
4830 DataType::Type::kReference).GetCode());
4831 locations->SetCustomSlowPathCallerSaves(caller_saves);
4832 } else {
4833 // For non-Baker read barrier we have a temp-clobbering call.
4834 }
4835 }
4836 }
4837
4838 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
4839 // move.
VisitLoadClass(HLoadClass * cls)4840 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
4841 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4842 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
4843 codegen_->GenerateLoadClassRuntimeCall(cls);
4844 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4845 return;
4846 }
4847 DCHECK(!cls->NeedsAccessCheck());
4848
4849 Location out_loc = cls->GetLocations()->Out();
4850 Register out = OutputRegister(cls);
4851
4852 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
4853 ? kWithoutReadBarrier
4854 : kCompilerReadBarrierOption;
4855 bool generate_null_check = false;
4856 switch (load_kind) {
4857 case HLoadClass::LoadKind::kReferrersClass: {
4858 DCHECK(!cls->CanCallRuntime());
4859 DCHECK(!cls->MustGenerateClinitCheck());
4860 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
4861 Register current_method = InputRegisterAt(cls, 0);
4862 GenerateGcRootFieldLoad(cls,
4863 out_loc,
4864 current_method,
4865 ArtMethod::DeclaringClassOffset().Int32Value(),
4866 /* fixup_label */ nullptr,
4867 read_barrier_option);
4868 break;
4869 }
4870 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
4871 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4872 // Add ADRP with its PC-relative type patch.
4873 const DexFile& dex_file = cls->GetDexFile();
4874 dex::TypeIndex type_index = cls->GetTypeIndex();
4875 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
4876 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4877 // Add ADD with its PC-relative type patch.
4878 vixl::aarch64::Label* add_label =
4879 codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
4880 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
4881 break;
4882 }
4883 case HLoadClass::LoadKind::kBootImageAddress: {
4884 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4885 uint32_t address = dchecked_integral_cast<uint32_t>(
4886 reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
4887 DCHECK_NE(address, 0u);
4888 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
4889 break;
4890 }
4891 case HLoadClass::LoadKind::kBootImageClassTable: {
4892 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
4893 // Add ADRP with its PC-relative type patch.
4894 const DexFile& dex_file = cls->GetDexFile();
4895 dex::TypeIndex type_index = cls->GetTypeIndex();
4896 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
4897 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4898 // Add LDR with its PC-relative type patch.
4899 vixl::aarch64::Label* ldr_label =
4900 codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
4901 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
4902 // Extract the reference from the slot data, i.e. clear the hash bits.
4903 int32_t masked_hash = ClassTable::TableSlot::MaskHash(
4904 ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index)));
4905 if (masked_hash != 0) {
4906 __ Sub(out.W(), out.W(), Operand(masked_hash));
4907 }
4908 break;
4909 }
4910 case HLoadClass::LoadKind::kBssEntry: {
4911 // Add ADRP with its PC-relative Class .bss entry patch.
4912 const DexFile& dex_file = cls->GetDexFile();
4913 dex::TypeIndex type_index = cls->GetTypeIndex();
4914 vixl::aarch64::Register temp = XRegisterFrom(out_loc);
4915 vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
4916 codegen_->EmitAdrpPlaceholder(adrp_label, temp);
4917 // Add LDR with its PC-relative Class patch.
4918 vixl::aarch64::Label* ldr_label =
4919 codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
4920 // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
4921 GenerateGcRootFieldLoad(cls,
4922 out_loc,
4923 temp,
4924 /* offset placeholder */ 0u,
4925 ldr_label,
4926 read_barrier_option);
4927 generate_null_check = true;
4928 break;
4929 }
4930 case HLoadClass::LoadKind::kJitTableAddress: {
4931 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
4932 cls->GetTypeIndex(),
4933 cls->GetClass()));
4934 GenerateGcRootFieldLoad(cls,
4935 out_loc,
4936 out.X(),
4937 /* offset */ 0,
4938 /* fixup_label */ nullptr,
4939 read_barrier_option);
4940 break;
4941 }
4942 case HLoadClass::LoadKind::kRuntimeCall:
4943 case HLoadClass::LoadKind::kInvalid:
4944 LOG(FATAL) << "UNREACHABLE";
4945 UNREACHABLE();
4946 }
4947
4948 bool do_clinit = cls->MustGenerateClinitCheck();
4949 if (generate_null_check || do_clinit) {
4950 DCHECK(cls->CanCallRuntime());
4951 SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(
4952 cls, cls, cls->GetDexPc(), do_clinit);
4953 codegen_->AddSlowPath(slow_path);
4954 if (generate_null_check) {
4955 __ Cbz(out, slow_path->GetEntryLabel());
4956 }
4957 if (cls->MustGenerateClinitCheck()) {
4958 GenerateClassInitializationCheck(slow_path, out);
4959 } else {
4960 __ Bind(slow_path->GetExitLabel());
4961 }
4962 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
4963 }
4964 }
4965
GetExceptionTlsAddress()4966 static MemOperand GetExceptionTlsAddress() {
4967 return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
4968 }
4969
VisitLoadException(HLoadException * load)4970 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
4971 LocationSummary* locations =
4972 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
4973 locations->SetOut(Location::RequiresRegister());
4974 }
4975
VisitLoadException(HLoadException * instruction)4976 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
4977 __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
4978 }
4979
VisitClearException(HClearException * clear)4980 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
4981 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
4982 }
4983
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)4984 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
4985 __ Str(wzr, GetExceptionTlsAddress());
4986 }
4987
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)4988 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
4989 HLoadString::LoadKind desired_string_load_kind) {
4990 switch (desired_string_load_kind) {
4991 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
4992 case HLoadString::LoadKind::kBootImageInternTable:
4993 case HLoadString::LoadKind::kBssEntry:
4994 DCHECK(!Runtime::Current()->UseJitCompilation());
4995 break;
4996 case HLoadString::LoadKind::kJitTableAddress:
4997 DCHECK(Runtime::Current()->UseJitCompilation());
4998 break;
4999 case HLoadString::LoadKind::kBootImageAddress:
5000 case HLoadString::LoadKind::kRuntimeCall:
5001 break;
5002 }
5003 return desired_string_load_kind;
5004 }
5005
VisitLoadString(HLoadString * load)5006 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5007 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5008 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5009 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5010 InvokeRuntimeCallingConvention calling_convention;
5011 locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5012 } else {
5013 locations->SetOut(Location::RequiresRegister());
5014 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5015 if (!kUseReadBarrier || kUseBakerReadBarrier) {
5016 // Rely on the pResolveString and marking to save everything we need.
5017 RegisterSet caller_saves = RegisterSet::Empty();
5018 InvokeRuntimeCallingConvention calling_convention;
5019 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
5020 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
5021 RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
5022 DataType::Type::kReference).GetCode());
5023 locations->SetCustomSlowPathCallerSaves(caller_saves);
5024 } else {
5025 // For non-Baker read barrier we have a temp-clobbering call.
5026 }
5027 }
5028 }
5029 }
5030
5031 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5032 // move.
VisitLoadString(HLoadString * load)5033 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5034 Register out = OutputRegister(load);
5035 Location out_loc = load->GetLocations()->Out();
5036
5037 switch (load->GetLoadKind()) {
5038 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5039 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5040 // Add ADRP with its PC-relative String patch.
5041 const DexFile& dex_file = load->GetDexFile();
5042 const dex::StringIndex string_index = load->GetStringIndex();
5043 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5044 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5045 // Add ADD with its PC-relative String patch.
5046 vixl::aarch64::Label* add_label =
5047 codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5048 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5049 return;
5050 }
5051 case HLoadString::LoadKind::kBootImageAddress: {
5052 uint32_t address = dchecked_integral_cast<uint32_t>(
5053 reinterpret_cast<uintptr_t>(load->GetString().Get()));
5054 DCHECK_NE(address, 0u);
5055 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5056 return;
5057 }
5058 case HLoadString::LoadKind::kBootImageInternTable: {
5059 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5060 // Add ADRP with its PC-relative String patch.
5061 const DexFile& dex_file = load->GetDexFile();
5062 const dex::StringIndex string_index = load->GetStringIndex();
5063 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5064 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5065 // Add LDR with its PC-relative String patch.
5066 vixl::aarch64::Label* ldr_label =
5067 codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5068 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5069 return;
5070 }
5071 case HLoadString::LoadKind::kBssEntry: {
5072 // Add ADRP with its PC-relative String .bss entry patch.
5073 const DexFile& dex_file = load->GetDexFile();
5074 const dex::StringIndex string_index = load->GetStringIndex();
5075 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5076 Register temp = XRegisterFrom(out_loc);
5077 vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
5078 codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5079 // Add LDR with its .bss entry String patch.
5080 vixl::aarch64::Label* ldr_label =
5081 codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
5082 // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
5083 GenerateGcRootFieldLoad(load,
5084 out_loc,
5085 temp,
5086 /* offset placeholder */ 0u,
5087 ldr_label,
5088 kCompilerReadBarrierOption);
5089 SlowPathCodeARM64* slow_path =
5090 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
5091 codegen_->AddSlowPath(slow_path);
5092 __ Cbz(out.X(), slow_path->GetEntryLabel());
5093 __ Bind(slow_path->GetExitLabel());
5094 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5095 return;
5096 }
5097 case HLoadString::LoadKind::kJitTableAddress: {
5098 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5099 load->GetStringIndex(),
5100 load->GetString()));
5101 GenerateGcRootFieldLoad(load,
5102 out_loc,
5103 out.X(),
5104 /* offset */ 0,
5105 /* fixup_label */ nullptr,
5106 kCompilerReadBarrierOption);
5107 return;
5108 }
5109 default:
5110 break;
5111 }
5112
5113 // TODO: Re-add the compiler code to do string dex cache lookup again.
5114 InvokeRuntimeCallingConvention calling_convention;
5115 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5116 __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5117 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5118 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5119 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5120 }
5121
VisitLongConstant(HLongConstant * constant)5122 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5123 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
5124 locations->SetOut(Location::ConstantLocation(constant));
5125 }
5126
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)5127 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
5128 // Will be generated at use site.
5129 }
5130
VisitMonitorOperation(HMonitorOperation * instruction)5131 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5132 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5133 instruction, LocationSummary::kCallOnMainOnly);
5134 InvokeRuntimeCallingConvention calling_convention;
5135 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5136 }
5137
VisitMonitorOperation(HMonitorOperation * instruction)5138 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5139 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5140 instruction,
5141 instruction->GetDexPc());
5142 if (instruction->IsEnter()) {
5143 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5144 } else {
5145 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5146 }
5147 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5148 }
5149
VisitMul(HMul * mul)5150 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5151 LocationSummary* locations =
5152 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
5153 switch (mul->GetResultType()) {
5154 case DataType::Type::kInt32:
5155 case DataType::Type::kInt64:
5156 locations->SetInAt(0, Location::RequiresRegister());
5157 locations->SetInAt(1, Location::RequiresRegister());
5158 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5159 break;
5160
5161 case DataType::Type::kFloat32:
5162 case DataType::Type::kFloat64:
5163 locations->SetInAt(0, Location::RequiresFpuRegister());
5164 locations->SetInAt(1, Location::RequiresFpuRegister());
5165 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5166 break;
5167
5168 default:
5169 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5170 }
5171 }
5172
VisitMul(HMul * mul)5173 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5174 switch (mul->GetResultType()) {
5175 case DataType::Type::kInt32:
5176 case DataType::Type::kInt64:
5177 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5178 break;
5179
5180 case DataType::Type::kFloat32:
5181 case DataType::Type::kFloat64:
5182 __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5183 break;
5184
5185 default:
5186 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5187 }
5188 }
5189
VisitNeg(HNeg * neg)5190 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5191 LocationSummary* locations =
5192 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
5193 switch (neg->GetResultType()) {
5194 case DataType::Type::kInt32:
5195 case DataType::Type::kInt64:
5196 locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5197 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5198 break;
5199
5200 case DataType::Type::kFloat32:
5201 case DataType::Type::kFloat64:
5202 locations->SetInAt(0, Location::RequiresFpuRegister());
5203 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5204 break;
5205
5206 default:
5207 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5208 }
5209 }
5210
VisitNeg(HNeg * neg)5211 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5212 switch (neg->GetResultType()) {
5213 case DataType::Type::kInt32:
5214 case DataType::Type::kInt64:
5215 __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5216 break;
5217
5218 case DataType::Type::kFloat32:
5219 case DataType::Type::kFloat64:
5220 __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5221 break;
5222
5223 default:
5224 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5225 }
5226 }
5227
VisitNewArray(HNewArray * instruction)5228 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5229 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5230 instruction, LocationSummary::kCallOnMainOnly);
5231 InvokeRuntimeCallingConvention calling_convention;
5232 locations->SetOut(LocationFrom(x0));
5233 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5234 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5235 }
5236
VisitNewArray(HNewArray * instruction)5237 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5238 // Note: if heap poisoning is enabled, the entry point takes cares
5239 // of poisoning the reference.
5240 QuickEntrypointEnum entrypoint =
5241 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
5242 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5243 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5244 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5245 }
5246
VisitNewInstance(HNewInstance * instruction)5247 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5248 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5249 instruction, LocationSummary::kCallOnMainOnly);
5250 InvokeRuntimeCallingConvention calling_convention;
5251 if (instruction->IsStringAlloc()) {
5252 locations->AddTemp(LocationFrom(kArtMethodRegister));
5253 } else {
5254 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5255 }
5256 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
5257 }
5258
VisitNewInstance(HNewInstance * instruction)5259 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5260 // Note: if heap poisoning is enabled, the entry point takes cares
5261 // of poisoning the reference.
5262 if (instruction->IsStringAlloc()) {
5263 // String is allocated through StringFactory. Call NewEmptyString entry point.
5264 Location temp = instruction->GetLocations()->GetTemp(0);
5265 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5266 __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
5267 __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
5268
5269 {
5270 // Ensure the pc position is recorded immediately after the `blr` instruction.
5271 ExactAssemblyScope eas(GetVIXLAssembler(),
5272 kInstructionSize,
5273 CodeBufferCheckScope::kExactSize);
5274 __ blr(lr);
5275 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
5276 }
5277 } else {
5278 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5279 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5280 }
5281 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5282 }
5283
VisitNot(HNot * instruction)5284 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5285 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5286 locations->SetInAt(0, Location::RequiresRegister());
5287 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5288 }
5289
VisitNot(HNot * instruction)5290 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5291 switch (instruction->GetResultType()) {
5292 case DataType::Type::kInt32:
5293 case DataType::Type::kInt64:
5294 __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5295 break;
5296
5297 default:
5298 LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5299 }
5300 }
5301
VisitBooleanNot(HBooleanNot * instruction)5302 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5303 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5304 locations->SetInAt(0, Location::RequiresRegister());
5305 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5306 }
5307
VisitBooleanNot(HBooleanNot * instruction)5308 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5309 __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5310 }
5311
VisitNullCheck(HNullCheck * instruction)5312 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5313 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5314 locations->SetInAt(0, Location::RequiresRegister());
5315 }
5316
GenerateImplicitNullCheck(HNullCheck * instruction)5317 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5318 if (CanMoveNullCheckToUser(instruction)) {
5319 return;
5320 }
5321 {
5322 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5323 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5324 Location obj = instruction->GetLocations()->InAt(0);
5325 __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5326 RecordPcInfo(instruction, instruction->GetDexPc());
5327 }
5328 }
5329
GenerateExplicitNullCheck(HNullCheck * instruction)5330 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5331 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
5332 AddSlowPath(slow_path);
5333
5334 LocationSummary* locations = instruction->GetLocations();
5335 Location obj = locations->InAt(0);
5336
5337 __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5338 }
5339
VisitNullCheck(HNullCheck * instruction)5340 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5341 codegen_->GenerateNullCheck(instruction);
5342 }
5343
VisitOr(HOr * instruction)5344 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5345 HandleBinaryOp(instruction);
5346 }
5347
VisitOr(HOr * instruction)5348 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5349 HandleBinaryOp(instruction);
5350 }
5351
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5352 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5353 LOG(FATAL) << "Unreachable";
5354 }
5355
VisitParallelMove(HParallelMove * instruction)5356 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5357 if (instruction->GetNext()->IsSuspendCheck() &&
5358 instruction->GetBlock()->GetLoopInformation() != nullptr) {
5359 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5360 // The back edge will generate the suspend check.
5361 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5362 }
5363
5364 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5365 }
5366
VisitParameterValue(HParameterValue * instruction)5367 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5368 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5369 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5370 if (location.IsStackSlot()) {
5371 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5372 } else if (location.IsDoubleStackSlot()) {
5373 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5374 }
5375 locations->SetOut(location);
5376 }
5377
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5378 void InstructionCodeGeneratorARM64::VisitParameterValue(
5379 HParameterValue* instruction ATTRIBUTE_UNUSED) {
5380 // Nothing to do, the parameter is already at its location.
5381 }
5382
VisitCurrentMethod(HCurrentMethod * instruction)5383 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5384 LocationSummary* locations =
5385 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5386 locations->SetOut(LocationFrom(kArtMethodRegister));
5387 }
5388
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5389 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5390 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5391 // Nothing to do, the method is already at its location.
5392 }
5393
VisitPhi(HPhi * instruction)5394 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5395 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5396 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5397 locations->SetInAt(i, Location::Any());
5398 }
5399 locations->SetOut(Location::Any());
5400 }
5401
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5402 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5403 LOG(FATAL) << "Unreachable";
5404 }
5405
VisitRem(HRem * rem)5406 void LocationsBuilderARM64::VisitRem(HRem* rem) {
5407 DataType::Type type = rem->GetResultType();
5408 LocationSummary::CallKind call_kind =
5409 DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5410 : LocationSummary::kNoCall;
5411 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
5412
5413 switch (type) {
5414 case DataType::Type::kInt32:
5415 case DataType::Type::kInt64:
5416 locations->SetInAt(0, Location::RequiresRegister());
5417 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5418 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5419 break;
5420
5421 case DataType::Type::kFloat32:
5422 case DataType::Type::kFloat64: {
5423 InvokeRuntimeCallingConvention calling_convention;
5424 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
5425 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
5426 locations->SetOut(calling_convention.GetReturnLocation(type));
5427
5428 break;
5429 }
5430
5431 default:
5432 LOG(FATAL) << "Unexpected rem type " << type;
5433 }
5434 }
5435
VisitRem(HRem * rem)5436 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
5437 DataType::Type type = rem->GetResultType();
5438
5439 switch (type) {
5440 case DataType::Type::kInt32:
5441 case DataType::Type::kInt64: {
5442 GenerateDivRemIntegral(rem);
5443 break;
5444 }
5445
5446 case DataType::Type::kFloat32:
5447 case DataType::Type::kFloat64: {
5448 QuickEntrypointEnum entrypoint =
5449 (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
5450 codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
5451 if (type == DataType::Type::kFloat32) {
5452 CheckEntrypointTypes<kQuickFmodf, float, float, float>();
5453 } else {
5454 CheckEntrypointTypes<kQuickFmod, double, double, double>();
5455 }
5456 break;
5457 }
5458
5459 default:
5460 LOG(FATAL) << "Unexpected rem type " << type;
5461 UNREACHABLE();
5462 }
5463 }
5464
VisitConstructorFence(HConstructorFence * constructor_fence)5465 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
5466 constructor_fence->SetLocations(nullptr);
5467 }
5468
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)5469 void InstructionCodeGeneratorARM64::VisitConstructorFence(
5470 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
5471 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
5472 }
5473
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5474 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5475 memory_barrier->SetLocations(nullptr);
5476 }
5477
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5478 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5479 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
5480 }
5481
VisitReturn(HReturn * instruction)5482 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
5483 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5484 DataType::Type return_type = instruction->InputAt(0)->GetType();
5485 locations->SetInAt(0, ARM64ReturnLocation(return_type));
5486 }
5487
VisitReturn(HReturn * instruction ATTRIBUTE_UNUSED)5488 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) {
5489 codegen_->GenerateFrameExit();
5490 }
5491
VisitReturnVoid(HReturnVoid * instruction)5492 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
5493 instruction->SetLocations(nullptr);
5494 }
5495
VisitReturnVoid(HReturnVoid * instruction ATTRIBUTE_UNUSED)5496 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
5497 codegen_->GenerateFrameExit();
5498 }
5499
VisitRor(HRor * ror)5500 void LocationsBuilderARM64::VisitRor(HRor* ror) {
5501 HandleBinaryOp(ror);
5502 }
5503
VisitRor(HRor * ror)5504 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
5505 HandleBinaryOp(ror);
5506 }
5507
VisitShl(HShl * shl)5508 void LocationsBuilderARM64::VisitShl(HShl* shl) {
5509 HandleShift(shl);
5510 }
5511
VisitShl(HShl * shl)5512 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
5513 HandleShift(shl);
5514 }
5515
VisitShr(HShr * shr)5516 void LocationsBuilderARM64::VisitShr(HShr* shr) {
5517 HandleShift(shr);
5518 }
5519
VisitShr(HShr * shr)5520 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
5521 HandleShift(shr);
5522 }
5523
VisitSub(HSub * instruction)5524 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
5525 HandleBinaryOp(instruction);
5526 }
5527
VisitSub(HSub * instruction)5528 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
5529 HandleBinaryOp(instruction);
5530 }
5531
VisitStaticFieldGet(HStaticFieldGet * instruction)5532 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5533 HandleFieldGet(instruction, instruction->GetFieldInfo());
5534 }
5535
VisitStaticFieldGet(HStaticFieldGet * instruction)5536 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5537 HandleFieldGet(instruction, instruction->GetFieldInfo());
5538 }
5539
VisitStaticFieldSet(HStaticFieldSet * instruction)5540 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5541 HandleFieldSet(instruction);
5542 }
5543
VisitStaticFieldSet(HStaticFieldSet * instruction)5544 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5545 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5546 }
5547
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5548 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
5549 HUnresolvedInstanceFieldGet* instruction) {
5550 FieldAccessCallingConventionARM64 calling_convention;
5551 codegen_->CreateUnresolvedFieldLocationSummary(
5552 instruction, instruction->GetFieldType(), calling_convention);
5553 }
5554
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5555 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
5556 HUnresolvedInstanceFieldGet* instruction) {
5557 FieldAccessCallingConventionARM64 calling_convention;
5558 codegen_->GenerateUnresolvedFieldAccess(instruction,
5559 instruction->GetFieldType(),
5560 instruction->GetFieldIndex(),
5561 instruction->GetDexPc(),
5562 calling_convention);
5563 }
5564
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5565 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
5566 HUnresolvedInstanceFieldSet* instruction) {
5567 FieldAccessCallingConventionARM64 calling_convention;
5568 codegen_->CreateUnresolvedFieldLocationSummary(
5569 instruction, instruction->GetFieldType(), calling_convention);
5570 }
5571
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5572 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
5573 HUnresolvedInstanceFieldSet* instruction) {
5574 FieldAccessCallingConventionARM64 calling_convention;
5575 codegen_->GenerateUnresolvedFieldAccess(instruction,
5576 instruction->GetFieldType(),
5577 instruction->GetFieldIndex(),
5578 instruction->GetDexPc(),
5579 calling_convention);
5580 }
5581
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5582 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
5583 HUnresolvedStaticFieldGet* instruction) {
5584 FieldAccessCallingConventionARM64 calling_convention;
5585 codegen_->CreateUnresolvedFieldLocationSummary(
5586 instruction, instruction->GetFieldType(), calling_convention);
5587 }
5588
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5589 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
5590 HUnresolvedStaticFieldGet* instruction) {
5591 FieldAccessCallingConventionARM64 calling_convention;
5592 codegen_->GenerateUnresolvedFieldAccess(instruction,
5593 instruction->GetFieldType(),
5594 instruction->GetFieldIndex(),
5595 instruction->GetDexPc(),
5596 calling_convention);
5597 }
5598
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5599 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
5600 HUnresolvedStaticFieldSet* instruction) {
5601 FieldAccessCallingConventionARM64 calling_convention;
5602 codegen_->CreateUnresolvedFieldLocationSummary(
5603 instruction, instruction->GetFieldType(), calling_convention);
5604 }
5605
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5606 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
5607 HUnresolvedStaticFieldSet* instruction) {
5608 FieldAccessCallingConventionARM64 calling_convention;
5609 codegen_->GenerateUnresolvedFieldAccess(instruction,
5610 instruction->GetFieldType(),
5611 instruction->GetFieldIndex(),
5612 instruction->GetDexPc(),
5613 calling_convention);
5614 }
5615
VisitSuspendCheck(HSuspendCheck * instruction)5616 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5617 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5618 instruction, LocationSummary::kCallOnSlowPath);
5619 // In suspend check slow path, usually there are no caller-save registers at all.
5620 // If SIMD instructions are present, however, we force spilling all live SIMD
5621 // registers in full width (since the runtime only saves/restores lower part).
5622 locations->SetCustomSlowPathCallerSaves(
5623 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5624 }
5625
VisitSuspendCheck(HSuspendCheck * instruction)5626 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5627 HBasicBlock* block = instruction->GetBlock();
5628 if (block->GetLoopInformation() != nullptr) {
5629 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5630 // The back edge will generate the suspend check.
5631 return;
5632 }
5633 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5634 // The goto will generate the suspend check.
5635 return;
5636 }
5637 GenerateSuspendCheck(instruction, nullptr);
5638 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
5639 }
5640
VisitThrow(HThrow * instruction)5641 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
5642 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5643 instruction, LocationSummary::kCallOnMainOnly);
5644 InvokeRuntimeCallingConvention calling_convention;
5645 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5646 }
5647
VisitThrow(HThrow * instruction)5648 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
5649 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5650 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5651 }
5652
VisitTypeConversion(HTypeConversion * conversion)5653 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
5654 LocationSummary* locations =
5655 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
5656 DataType::Type input_type = conversion->GetInputType();
5657 DataType::Type result_type = conversion->GetResultType();
5658 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
5659 << input_type << " -> " << result_type;
5660 if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
5661 (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
5662 LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
5663 }
5664
5665 if (DataType::IsFloatingPointType(input_type)) {
5666 locations->SetInAt(0, Location::RequiresFpuRegister());
5667 } else {
5668 locations->SetInAt(0, Location::RequiresRegister());
5669 }
5670
5671 if (DataType::IsFloatingPointType(result_type)) {
5672 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5673 } else {
5674 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5675 }
5676 }
5677
VisitTypeConversion(HTypeConversion * conversion)5678 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
5679 DataType::Type result_type = conversion->GetResultType();
5680 DataType::Type input_type = conversion->GetInputType();
5681
5682 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
5683 << input_type << " -> " << result_type;
5684
5685 if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
5686 int result_size = DataType::Size(result_type);
5687 int input_size = DataType::Size(input_type);
5688 int min_size = std::min(result_size, input_size);
5689 Register output = OutputRegister(conversion);
5690 Register source = InputRegisterAt(conversion, 0);
5691 if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
5692 // 'int' values are used directly as W registers, discarding the top
5693 // bits, so we don't need to sign-extend and can just perform a move.
5694 // We do not pass the `kDiscardForSameWReg` argument to force clearing the
5695 // top 32 bits of the target register. We theoretically could leave those
5696 // bits unchanged, but we would have to make sure that no code uses a
5697 // 32bit input value as a 64bit value assuming that the top 32 bits are
5698 // zero.
5699 __ Mov(output.W(), source.W());
5700 } else if (DataType::IsUnsignedType(result_type) ||
5701 (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
5702 __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
5703 } else {
5704 __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
5705 }
5706 } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
5707 __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
5708 } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
5709 CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
5710 __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
5711 } else if (DataType::IsFloatingPointType(result_type) &&
5712 DataType::IsFloatingPointType(input_type)) {
5713 __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
5714 } else {
5715 LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
5716 << " to " << result_type;
5717 }
5718 }
5719
VisitUShr(HUShr * ushr)5720 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
5721 HandleShift(ushr);
5722 }
5723
VisitUShr(HUShr * ushr)5724 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
5725 HandleShift(ushr);
5726 }
5727
VisitXor(HXor * instruction)5728 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
5729 HandleBinaryOp(instruction);
5730 }
5731
VisitXor(HXor * instruction)5732 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
5733 HandleBinaryOp(instruction);
5734 }
5735
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5736 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5737 // Nothing to do, this should be removed during prepare for register allocator.
5738 LOG(FATAL) << "Unreachable";
5739 }
5740
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5741 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5742 // Nothing to do, this should be removed during prepare for register allocator.
5743 LOG(FATAL) << "Unreachable";
5744 }
5745
5746 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)5747 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5748 LocationSummary* locations =
5749 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
5750 locations->SetInAt(0, Location::RequiresRegister());
5751 }
5752
VisitPackedSwitch(HPackedSwitch * switch_instr)5753 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5754 int32_t lower_bound = switch_instr->GetStartValue();
5755 uint32_t num_entries = switch_instr->GetNumEntries();
5756 Register value_reg = InputRegisterAt(switch_instr, 0);
5757 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
5758
5759 // Roughly set 16 as max average assemblies generated per HIR in a graph.
5760 static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
5761 // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
5762 // make sure we don't emit it if the target may run out of range.
5763 // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
5764 // ranges and emit the tables only as required.
5765 static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
5766
5767 if (num_entries <= kPackedSwitchCompareJumpThreshold ||
5768 // Current instruction id is an upper bound of the number of HIRs in the graph.
5769 GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
5770 // Create a series of compare/jumps.
5771 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5772 Register temp = temps.AcquireW();
5773 __ Subs(temp, value_reg, Operand(lower_bound));
5774
5775 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
5776 // Jump to successors[0] if value == lower_bound.
5777 __ B(eq, codegen_->GetLabelOf(successors[0]));
5778 int32_t last_index = 0;
5779 for (; num_entries - last_index > 2; last_index += 2) {
5780 __ Subs(temp, temp, Operand(2));
5781 // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
5782 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
5783 // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
5784 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
5785 }
5786 if (num_entries - last_index == 2) {
5787 // The last missing case_value.
5788 __ Cmp(temp, Operand(1));
5789 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
5790 }
5791
5792 // And the default for any other value.
5793 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
5794 __ B(codegen_->GetLabelOf(default_block));
5795 }
5796 } else {
5797 JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
5798
5799 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5800
5801 // Below instructions should use at most one blocked register. Since there are two blocked
5802 // registers, we are free to block one.
5803 Register temp_w = temps.AcquireW();
5804 Register index;
5805 // Remove the bias.
5806 if (lower_bound != 0) {
5807 index = temp_w;
5808 __ Sub(index, value_reg, Operand(lower_bound));
5809 } else {
5810 index = value_reg;
5811 }
5812
5813 // Jump to default block if index is out of the range.
5814 __ Cmp(index, Operand(num_entries));
5815 __ B(hs, codegen_->GetLabelOf(default_block));
5816
5817 // In current VIXL implementation, it won't require any blocked registers to encode the
5818 // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
5819 // register pressure.
5820 Register table_base = temps.AcquireX();
5821 // Load jump offset from the table.
5822 __ Adr(table_base, jump_table->GetTableStartLabel());
5823 Register jump_offset = temp_w;
5824 __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
5825
5826 // Jump to target block by branching to table_base(pc related) + offset.
5827 Register target_address = table_base;
5828 __ Add(target_address, table_base, Operand(jump_offset, SXTW));
5829 __ Br(target_address);
5830 }
5831 }
5832
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5833 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
5834 HInstruction* instruction,
5835 Location out,
5836 uint32_t offset,
5837 Location maybe_temp,
5838 ReadBarrierOption read_barrier_option) {
5839 DataType::Type type = DataType::Type::kReference;
5840 Register out_reg = RegisterFrom(out, type);
5841 if (read_barrier_option == kWithReadBarrier) {
5842 CHECK(kEmitCompilerReadBarrier);
5843 if (kUseBakerReadBarrier) {
5844 // Load with fast path based Baker's read barrier.
5845 // /* HeapReference<Object> */ out = *(out + offset)
5846 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5847 out,
5848 out_reg,
5849 offset,
5850 maybe_temp,
5851 /* needs_null_check */ false,
5852 /* use_load_acquire */ false);
5853 } else {
5854 // Load with slow path based read barrier.
5855 // Save the value of `out` into `maybe_temp` before overwriting it
5856 // in the following move operation, as we will need it for the
5857 // read barrier below.
5858 Register temp_reg = RegisterFrom(maybe_temp, type);
5859 __ Mov(temp_reg, out_reg);
5860 // /* HeapReference<Object> */ out = *(out + offset)
5861 __ Ldr(out_reg, HeapOperand(out_reg, offset));
5862 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
5863 }
5864 } else {
5865 // Plain load with no read barrier.
5866 // /* HeapReference<Object> */ out = *(out + offset)
5867 __ Ldr(out_reg, HeapOperand(out_reg, offset));
5868 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5869 }
5870 }
5871
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5872 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
5873 HInstruction* instruction,
5874 Location out,
5875 Location obj,
5876 uint32_t offset,
5877 Location maybe_temp,
5878 ReadBarrierOption read_barrier_option) {
5879 DataType::Type type = DataType::Type::kReference;
5880 Register out_reg = RegisterFrom(out, type);
5881 Register obj_reg = RegisterFrom(obj, type);
5882 if (read_barrier_option == kWithReadBarrier) {
5883 CHECK(kEmitCompilerReadBarrier);
5884 if (kUseBakerReadBarrier) {
5885 // Load with fast path based Baker's read barrier.
5886 // /* HeapReference<Object> */ out = *(obj + offset)
5887 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5888 out,
5889 obj_reg,
5890 offset,
5891 maybe_temp,
5892 /* needs_null_check */ false,
5893 /* use_load_acquire */ false);
5894 } else {
5895 // Load with slow path based read barrier.
5896 // /* HeapReference<Object> */ out = *(obj + offset)
5897 __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5898 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
5899 }
5900 } else {
5901 // Plain load with no read barrier.
5902 // /* HeapReference<Object> */ out = *(obj + offset)
5903 __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5904 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5905 }
5906 }
5907
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)5908 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
5909 HInstruction* instruction,
5910 Location root,
5911 Register obj,
5912 uint32_t offset,
5913 vixl::aarch64::Label* fixup_label,
5914 ReadBarrierOption read_barrier_option) {
5915 DCHECK(fixup_label == nullptr || offset == 0u);
5916 Register root_reg = RegisterFrom(root, DataType::Type::kReference);
5917 if (read_barrier_option == kWithReadBarrier) {
5918 DCHECK(kEmitCompilerReadBarrier);
5919 if (kUseBakerReadBarrier) {
5920 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
5921 // Baker's read barrier are used.
5922 if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
5923 !Runtime::Current()->UseJitCompilation()) {
5924 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
5925 // the Marking Register) to decide whether we need to enter
5926 // the slow path to mark the GC root.
5927 //
5928 // We use link-time generated thunks for the slow path. That thunk
5929 // checks the reference and jumps to the entrypoint if needed.
5930 //
5931 // lr = &return_address;
5932 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
5933 // if (mr) { // Thread::Current()->GetIsGcMarking()
5934 // goto gc_root_thunk<root_reg>(lr)
5935 // }
5936 // return_address:
5937
5938 UseScratchRegisterScope temps(GetVIXLAssembler());
5939 DCHECK(temps.IsAvailable(ip0));
5940 DCHECK(temps.IsAvailable(ip1));
5941 temps.Exclude(ip0, ip1);
5942 uint32_t custom_data =
5943 linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
5944 vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
5945
5946 EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
5947 vixl::aarch64::Label return_address;
5948 __ adr(lr, &return_address);
5949 if (fixup_label != nullptr) {
5950 __ Bind(fixup_label);
5951 }
5952 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
5953 "GC root LDR must be 2 instruction (8B) before the return address label.");
5954 __ ldr(root_reg, MemOperand(obj.X(), offset));
5955 __ Bind(cbnz_label);
5956 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
5957 __ Bind(&return_address);
5958 } else {
5959 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
5960 // the Marking Register) to decide whether we need to enter
5961 // the slow path to mark the GC root.
5962 //
5963 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
5964 // if (mr) { // Thread::Current()->GetIsGcMarking()
5965 // // Slow path.
5966 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
5967 // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call.
5968 // }
5969
5970 // Slow path marking the GC root `root`. The entrypoint will
5971 // be loaded by the slow path code.
5972 SlowPathCodeARM64* slow_path =
5973 new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
5974 codegen_->AddSlowPath(slow_path);
5975
5976 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
5977 if (fixup_label == nullptr) {
5978 __ Ldr(root_reg, MemOperand(obj, offset));
5979 } else {
5980 codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
5981 }
5982 static_assert(
5983 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
5984 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
5985 "have different sizes.");
5986 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
5987 "art::mirror::CompressedReference<mirror::Object> and int32_t "
5988 "have different sizes.");
5989
5990 __ Cbnz(mr, slow_path->GetEntryLabel());
5991 __ Bind(slow_path->GetExitLabel());
5992 }
5993 } else {
5994 // GC root loaded through a slow path for read barriers other
5995 // than Baker's.
5996 // /* GcRoot<mirror::Object>* */ root = obj + offset
5997 if (fixup_label == nullptr) {
5998 __ Add(root_reg.X(), obj.X(), offset);
5999 } else {
6000 codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6001 }
6002 // /* mirror::Object* */ root = root->Read()
6003 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6004 }
6005 } else {
6006 // Plain GC root load with no read barrier.
6007 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6008 if (fixup_label == nullptr) {
6009 __ Ldr(root_reg, MemOperand(obj, offset));
6010 } else {
6011 codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6012 }
6013 // Note that GC roots are not affected by heap poisoning, thus we
6014 // do not have to unpoison `root_reg` here.
6015 }
6016 codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
6017 }
6018
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)6019 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6020 Location ref,
6021 Register obj,
6022 uint32_t offset,
6023 Location maybe_temp,
6024 bool needs_null_check,
6025 bool use_load_acquire) {
6026 DCHECK(kEmitCompilerReadBarrier);
6027 DCHECK(kUseBakerReadBarrier);
6028
6029 if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
6030 !use_load_acquire &&
6031 !Runtime::Current()->UseJitCompilation()) {
6032 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6033 // Marking Register) to decide whether we need to enter the slow
6034 // path to mark the reference. Then, in the slow path, check the
6035 // gray bit in the lock word of the reference's holder (`obj`) to
6036 // decide whether to mark `ref` or not.
6037 //
6038 // We use link-time generated thunks for the slow path. That thunk checks
6039 // the holder and jumps to the entrypoint if needed. If the holder is not
6040 // gray, it creates a fake dependency and returns to the LDR instruction.
6041 //
6042 // lr = &gray_return_address;
6043 // if (mr) { // Thread::Current()->GetIsGcMarking()
6044 // goto field_thunk<holder_reg, base_reg>(lr)
6045 // }
6046 // not_gray_return_address:
6047 // // Original reference load. If the offset is too large to fit
6048 // // into LDR, we use an adjusted base register here.
6049 // HeapReference<mirror::Object> reference = *(obj+offset);
6050 // gray_return_address:
6051
6052 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6053 Register base = obj;
6054 if (offset >= kReferenceLoadMinFarOffset) {
6055 DCHECK(maybe_temp.IsRegister());
6056 base = WRegisterFrom(maybe_temp);
6057 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6058 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6059 offset &= (kReferenceLoadMinFarOffset - 1u);
6060 }
6061 UseScratchRegisterScope temps(GetVIXLAssembler());
6062 DCHECK(temps.IsAvailable(ip0));
6063 DCHECK(temps.IsAvailable(ip1));
6064 temps.Exclude(ip0, ip1);
6065 uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
6066 base.GetCode(),
6067 obj.GetCode());
6068 vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
6069
6070 {
6071 EmissionCheckScope guard(GetVIXLAssembler(),
6072 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6073 vixl::aarch64::Label return_address;
6074 __ adr(lr, &return_address);
6075 __ Bind(cbnz_label);
6076 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
6077 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6078 "Field LDR must be 1 instruction (4B) before the return address label; "
6079 " 2 instructions (8B) for heap poisoning.");
6080 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6081 __ ldr(ref_reg, MemOperand(base.X(), offset));
6082 if (needs_null_check) {
6083 MaybeRecordImplicitNullCheck(instruction);
6084 }
6085 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6086 __ Bind(&return_address);
6087 }
6088 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
6089 return;
6090 }
6091
6092 // /* HeapReference<Object> */ ref = *(obj + offset)
6093 Register temp = WRegisterFrom(maybe_temp);
6094 Location no_index = Location::NoLocation();
6095 size_t no_scale_factor = 0u;
6096 GenerateReferenceLoadWithBakerReadBarrier(instruction,
6097 ref,
6098 obj,
6099 offset,
6100 no_index,
6101 no_scale_factor,
6102 temp,
6103 needs_null_check,
6104 use_load_acquire);
6105 }
6106
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,Register temp,bool needs_null_check)6107 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6108 Location ref,
6109 Register obj,
6110 uint32_t data_offset,
6111 Location index,
6112 Register temp,
6113 bool needs_null_check) {
6114 DCHECK(kEmitCompilerReadBarrier);
6115 DCHECK(kUseBakerReadBarrier);
6116
6117 static_assert(
6118 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6119 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6120 size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
6121
6122 if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
6123 !Runtime::Current()->UseJitCompilation()) {
6124 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6125 // Marking Register) to decide whether we need to enter the slow
6126 // path to mark the reference. Then, in the slow path, check the
6127 // gray bit in the lock word of the reference's holder (`obj`) to
6128 // decide whether to mark `ref` or not.
6129 //
6130 // We use link-time generated thunks for the slow path. That thunk checks
6131 // the holder and jumps to the entrypoint if needed. If the holder is not
6132 // gray, it creates a fake dependency and returns to the LDR instruction.
6133 //
6134 // lr = &gray_return_address;
6135 // if (mr) { // Thread::Current()->GetIsGcMarking()
6136 // goto array_thunk<base_reg>(lr)
6137 // }
6138 // not_gray_return_address:
6139 // // Original reference load. If the offset is too large to fit
6140 // // into LDR, we use an adjusted base register here.
6141 // HeapReference<mirror::Object> reference = data[index];
6142 // gray_return_address:
6143
6144 DCHECK(index.IsValid());
6145 Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
6146 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6147
6148 UseScratchRegisterScope temps(GetVIXLAssembler());
6149 DCHECK(temps.IsAvailable(ip0));
6150 DCHECK(temps.IsAvailable(ip1));
6151 temps.Exclude(ip0, ip1);
6152 uint32_t custom_data =
6153 linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
6154 vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
6155
6156 __ Add(temp.X(), obj.X(), Operand(data_offset));
6157 {
6158 EmissionCheckScope guard(GetVIXLAssembler(),
6159 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6160 vixl::aarch64::Label return_address;
6161 __ adr(lr, &return_address);
6162 __ Bind(cbnz_label);
6163 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
6164 static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6165 "Array LDR must be 1 instruction (4B) before the return address label; "
6166 " 2 instructions (8B) for heap poisoning.");
6167 __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
6168 DCHECK(!needs_null_check); // The thunk cannot handle the null check.
6169 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6170 __ Bind(&return_address);
6171 }
6172 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
6173 return;
6174 }
6175
6176 // Array cells are never volatile variables, therefore array loads
6177 // never use Load-Acquire instructions on ARM64.
6178 const bool use_load_acquire = false;
6179
6180 // /* HeapReference<Object> */ ref =
6181 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6182 GenerateReferenceLoadWithBakerReadBarrier(instruction,
6183 ref,
6184 obj,
6185 data_offset,
6186 index,
6187 scale_factor,
6188 temp,
6189 needs_null_check,
6190 use_load_acquire);
6191 }
6192
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,Register temp,bool needs_null_check,bool use_load_acquire)6193 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6194 Location ref,
6195 Register obj,
6196 uint32_t offset,
6197 Location index,
6198 size_t scale_factor,
6199 Register temp,
6200 bool needs_null_check,
6201 bool use_load_acquire) {
6202 DCHECK(kEmitCompilerReadBarrier);
6203 DCHECK(kUseBakerReadBarrier);
6204 // If we are emitting an array load, we should not be using a
6205 // Load Acquire instruction. In other words:
6206 // `instruction->IsArrayGet()` => `!use_load_acquire`.
6207 DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
6208
6209 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6210 // Marking Register) to decide whether we need to enter the slow
6211 // path to mark the reference. Then, in the slow path, check the
6212 // gray bit in the lock word of the reference's holder (`obj`) to
6213 // decide whether to mark `ref` or not.
6214 //
6215 // if (mr) { // Thread::Current()->GetIsGcMarking()
6216 // // Slow path.
6217 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6218 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
6219 // HeapReference<mirror::Object> ref = *src; // Original reference load.
6220 // bool is_gray = (rb_state == ReadBarrier::GrayState());
6221 // if (is_gray) {
6222 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6223 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
6224 // }
6225 // } else {
6226 // HeapReference<mirror::Object> ref = *src; // Original reference load.
6227 // }
6228
6229 // Slow path marking the object `ref` when the GC is marking. The
6230 // entrypoint will be loaded by the slow path code.
6231 SlowPathCodeARM64* slow_path =
6232 new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
6233 instruction,
6234 ref,
6235 obj,
6236 offset,
6237 index,
6238 scale_factor,
6239 needs_null_check,
6240 use_load_acquire,
6241 temp);
6242 AddSlowPath(slow_path);
6243
6244 __ Cbnz(mr, slow_path->GetEntryLabel());
6245 // Fast path: the GC is not marking: just load the reference.
6246 GenerateRawReferenceLoad(
6247 instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
6248 __ Bind(slow_path->GetExitLabel());
6249 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
6250 }
6251
UpdateReferenceFieldWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,Location field_offset,Register temp,bool needs_null_check,bool use_load_acquire)6252 void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
6253 Location ref,
6254 Register obj,
6255 Location field_offset,
6256 Register temp,
6257 bool needs_null_check,
6258 bool use_load_acquire) {
6259 DCHECK(kEmitCompilerReadBarrier);
6260 DCHECK(kUseBakerReadBarrier);
6261 // If we are emitting an array load, we should not be using a
6262 // Load Acquire instruction. In other words:
6263 // `instruction->IsArrayGet()` => `!use_load_acquire`.
6264 DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
6265
6266 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6267 // Marking Register) to decide whether we need to enter the slow
6268 // path to update the reference field within `obj`. Then, in the
6269 // slow path, check the gray bit in the lock word of the reference's
6270 // holder (`obj`) to decide whether to mark `ref` and update the
6271 // field or not.
6272 //
6273 // if (mr) { // Thread::Current()->GetIsGcMarking()
6274 // // Slow path.
6275 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6276 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
6277 // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load.
6278 // bool is_gray = (rb_state == ReadBarrier::GrayState());
6279 // if (is_gray) {
6280 // old_ref = ref;
6281 // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6282 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
6283 // compareAndSwapObject(obj, field_offset, old_ref, ref);
6284 // }
6285 // }
6286
6287 // Slow path updating the object reference at address `obj + field_offset`
6288 // when the GC is marking. The entrypoint will be loaded by the slow path code.
6289 SlowPathCodeARM64* slow_path =
6290 new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
6291 instruction,
6292 ref,
6293 obj,
6294 /* offset */ 0u,
6295 /* index */ field_offset,
6296 /* scale_factor */ 0u /* "times 1" */,
6297 needs_null_check,
6298 use_load_acquire,
6299 temp);
6300 AddSlowPath(slow_path);
6301
6302 __ Cbnz(mr, slow_path->GetEntryLabel());
6303 // Fast path: the GC is not marking: nothing to do (the field is
6304 // up-to-date, and we don't need to load the reference).
6305 __ Bind(slow_path->GetExitLabel());
6306 MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
6307 }
6308
GenerateRawReferenceLoad(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire)6309 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
6310 Location ref,
6311 Register obj,
6312 uint32_t offset,
6313 Location index,
6314 size_t scale_factor,
6315 bool needs_null_check,
6316 bool use_load_acquire) {
6317 DCHECK(obj.IsW());
6318 DataType::Type type = DataType::Type::kReference;
6319 Register ref_reg = RegisterFrom(ref, type);
6320
6321 // If needed, vixl::EmissionCheckScope guards are used to ensure
6322 // that no pools are emitted between the load (macro) instruction
6323 // and MaybeRecordImplicitNullCheck.
6324
6325 if (index.IsValid()) {
6326 // Load types involving an "index": ArrayGet,
6327 // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
6328 // intrinsics.
6329 if (use_load_acquire) {
6330 // UnsafeGetObjectVolatile intrinsic case.
6331 // Register `index` is not an index in an object array, but an
6332 // offset to an object reference field within object `obj`.
6333 DCHECK(instruction->IsInvoke()) << instruction->DebugName();
6334 DCHECK(instruction->GetLocations()->Intrinsified());
6335 DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
6336 << instruction->AsInvoke()->GetIntrinsic();
6337 DCHECK_EQ(offset, 0u);
6338 DCHECK_EQ(scale_factor, 0u);
6339 DCHECK_EQ(needs_null_check, false);
6340 // /* HeapReference<mirror::Object> */ ref = *(obj + index)
6341 MemOperand field = HeapOperand(obj, XRegisterFrom(index));
6342 LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
6343 } else {
6344 // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
6345 // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
6346 if (index.IsConstant()) {
6347 uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
6348 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6349 Load(type, ref_reg, HeapOperand(obj, computed_offset));
6350 if (needs_null_check) {
6351 MaybeRecordImplicitNullCheck(instruction);
6352 }
6353 } else {
6354 UseScratchRegisterScope temps(GetVIXLAssembler());
6355 Register temp = temps.AcquireW();
6356 __ Add(temp, obj, offset);
6357 {
6358 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6359 Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
6360 if (needs_null_check) {
6361 MaybeRecordImplicitNullCheck(instruction);
6362 }
6363 }
6364 }
6365 }
6366 } else {
6367 // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
6368 MemOperand field = HeapOperand(obj, offset);
6369 if (use_load_acquire) {
6370 // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
6371 LoadAcquire(instruction, ref_reg, field, needs_null_check);
6372 } else {
6373 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6374 Load(type, ref_reg, field);
6375 if (needs_null_check) {
6376 MaybeRecordImplicitNullCheck(instruction);
6377 }
6378 }
6379 }
6380
6381 // Object* ref = ref_addr->AsMirrorPtr()
6382 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6383 }
6384
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)6385 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
6386 // The following condition is a compile-time one, so it does not have a run-time cost.
6387 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
6388 // The following condition is a run-time one; it is executed after the
6389 // previous compile-time test, to avoid penalizing non-debug builds.
6390 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
6391 UseScratchRegisterScope temps(GetVIXLAssembler());
6392 Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
6393 GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
6394 }
6395 }
6396 }
6397
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6398 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6399 Location out,
6400 Location ref,
6401 Location obj,
6402 uint32_t offset,
6403 Location index) {
6404 DCHECK(kEmitCompilerReadBarrier);
6405
6406 // Insert a slow path based read barrier *after* the reference load.
6407 //
6408 // If heap poisoning is enabled, the unpoisoning of the loaded
6409 // reference will be carried out by the runtime within the slow
6410 // path.
6411 //
6412 // Note that `ref` currently does not get unpoisoned (when heap
6413 // poisoning is enabled), which is alright as the `ref` argument is
6414 // not used by the artReadBarrierSlow entry point.
6415 //
6416 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6417 SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
6418 ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6419 AddSlowPath(slow_path);
6420
6421 __ B(slow_path->GetEntryLabel());
6422 __ Bind(slow_path->GetExitLabel());
6423 }
6424
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6425 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6426 Location out,
6427 Location ref,
6428 Location obj,
6429 uint32_t offset,
6430 Location index) {
6431 if (kEmitCompilerReadBarrier) {
6432 // Baker's read barriers shall be handled by the fast path
6433 // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6434 DCHECK(!kUseBakerReadBarrier);
6435 // If heap poisoning is enabled, unpoisoning will be taken care of
6436 // by the runtime within the slow path.
6437 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6438 } else if (kPoisonHeapReferences) {
6439 GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6440 }
6441 }
6442
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6443 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6444 Location out,
6445 Location root) {
6446 DCHECK(kEmitCompilerReadBarrier);
6447
6448 // Insert a slow path based read barrier *after* the GC root load.
6449 //
6450 // Note that GC roots are not affected by heap poisoning, so we do
6451 // not need to do anything special for this here.
6452 SlowPathCodeARM64* slow_path =
6453 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6454 AddSlowPath(slow_path);
6455
6456 __ B(slow_path->GetEntryLabel());
6457 __ Bind(slow_path->GetExitLabel());
6458 }
6459
VisitClassTableGet(HClassTableGet * instruction)6460 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6461 LocationSummary* locations =
6462 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6463 locations->SetInAt(0, Location::RequiresRegister());
6464 locations->SetOut(Location::RequiresRegister());
6465 }
6466
VisitClassTableGet(HClassTableGet * instruction)6467 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
6468 LocationSummary* locations = instruction->GetLocations();
6469 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
6470 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
6471 instruction->GetIndex(), kArm64PointerSize).SizeValue();
6472 __ Ldr(XRegisterFrom(locations->Out()),
6473 MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
6474 } else {
6475 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
6476 instruction->GetIndex(), kArm64PointerSize));
6477 __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
6478 mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
6479 __ Ldr(XRegisterFrom(locations->Out()),
6480 MemOperand(XRegisterFrom(locations->Out()), method_offset));
6481 }
6482 }
6483
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,vixl::aarch64::Literal<uint32_t> * literal,uint64_t index_in_table)6484 static void PatchJitRootUse(uint8_t* code,
6485 const uint8_t* roots_data,
6486 vixl::aarch64::Literal<uint32_t>* literal,
6487 uint64_t index_in_table) {
6488 uint32_t literal_offset = literal->GetOffset();
6489 uintptr_t address =
6490 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
6491 uint8_t* data = code + literal_offset;
6492 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
6493 }
6494
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)6495 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
6496 for (const auto& entry : jit_string_patches_) {
6497 const StringReference& string_reference = entry.first;
6498 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6499 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
6500 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6501 }
6502 for (const auto& entry : jit_class_patches_) {
6503 const TypeReference& type_reference = entry.first;
6504 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6505 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
6506 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6507 }
6508 }
6509
6510 #undef __
6511 #undef QUICK_ENTRY_POINT
6512
6513 } // namespace arm64
6514 } // namespace art
6515