1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
19
20 #include "base/bit_field.h"
21 #include "code_generator.h"
22 #include "common_arm64.h"
23 #include "dex/dex_file_types.h"
24 #include "dex/string_reference.h"
25 #include "dex/type_reference.h"
26 #include "driver/compiler_options.h"
27 #include "nodes.h"
28 #include "parallel_move_resolver.h"
29 #include "utils/arm64/assembler_arm64.h"
30
31 // TODO(VIXL): Make VIXL compile with -Wshadow.
32 #pragma GCC diagnostic push
33 #pragma GCC diagnostic ignored "-Wshadow"
34 #include "aarch64/disasm-aarch64.h"
35 #include "aarch64/macro-assembler-aarch64.h"
36 #pragma GCC diagnostic pop
37
38 namespace art {
39
40 namespace linker {
41 class Arm64RelativePatcherTest;
42 } // namespace linker
43
44 namespace arm64 {
45
46 class CodeGeneratorARM64;
47
48 // Use a local definition to prevent copying mistakes.
49 static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
50
51 // These constants are used as an approximate margin when emission of veneer and literal pools
52 // must be blocked.
53 static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize;
54 static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes;
55
56 static const vixl::aarch64::Register kParameterCoreRegisters[] = {
57 vixl::aarch64::x1,
58 vixl::aarch64::x2,
59 vixl::aarch64::x3,
60 vixl::aarch64::x4,
61 vixl::aarch64::x5,
62 vixl::aarch64::x6,
63 vixl::aarch64::x7
64 };
65 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
66 static const vixl::aarch64::VRegister kParameterFPRegisters[] = {
67 vixl::aarch64::d0,
68 vixl::aarch64::d1,
69 vixl::aarch64::d2,
70 vixl::aarch64::d3,
71 vixl::aarch64::d4,
72 vixl::aarch64::d5,
73 vixl::aarch64::d6,
74 vixl::aarch64::d7
75 };
76 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
77
78 // Thread Register.
79 const vixl::aarch64::Register tr = vixl::aarch64::x19;
80 // Marking Register.
81 const vixl::aarch64::Register mr = vixl::aarch64::x20;
82 // Method register on invoke.
83 static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0;
84 const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0,
85 vixl::aarch64::ip1);
86 const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31);
87
88 const vixl::aarch64::CPURegList runtime_reserved_core_registers =
89 vixl::aarch64::CPURegList(
90 tr,
91 // Reserve X20 as Marking Register when emitting Baker read barriers.
92 ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg),
93 vixl::aarch64::lr);
94
95 // Some instructions have special requirements for a temporary, for example
96 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
97 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
98 // loads with large offsets need a fixed register to limit the number of link-time
99 // thunks we generate. For these and similar cases, we want to reserve a specific
100 // register that's neither callee-save nor an argument register. We choose x15.
FixedTempLocation()101 inline Location FixedTempLocation() {
102 return Location::RegisterLocation(vixl::aarch64::x15.GetCode());
103 }
104
105 // Callee-save registers AAPCS64, without x19 (Thread Register) (nor
106 // x20 (Marking Register) when emitting Baker read barriers).
107 const vixl::aarch64::CPURegList callee_saved_core_registers(
108 vixl::aarch64::CPURegister::kRegister,
109 vixl::aarch64::kXRegSize,
110 ((kEmitCompilerReadBarrier && kUseBakerReadBarrier)
111 ? vixl::aarch64::x21.GetCode()
112 : vixl::aarch64::x20.GetCode()),
113 vixl::aarch64::x30.GetCode());
114 const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister,
115 vixl::aarch64::kDRegSize,
116 vixl::aarch64::d8.GetCode(),
117 vixl::aarch64::d15.GetCode());
118 Location ARM64ReturnLocation(DataType::Type return_type);
119
120 class SlowPathCodeARM64 : public SlowPathCode {
121 public:
SlowPathCodeARM64(HInstruction * instruction)122 explicit SlowPathCodeARM64(HInstruction* instruction)
123 : SlowPathCode(instruction), entry_label_(), exit_label_() {}
124
GetEntryLabel()125 vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; }
GetExitLabel()126 vixl::aarch64::Label* GetExitLabel() { return &exit_label_; }
127
128 void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
129 void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
130
131 private:
132 vixl::aarch64::Label entry_label_;
133 vixl::aarch64::Label exit_label_;
134
135 DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
136 };
137
138 class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
139 public:
JumpTableARM64(HPackedSwitch * switch_instr)140 explicit JumpTableARM64(HPackedSwitch* switch_instr)
141 : switch_instr_(switch_instr), table_start_() {}
142
GetTableStartLabel()143 vixl::aarch64::Label* GetTableStartLabel() { return &table_start_; }
144
145 void EmitTable(CodeGeneratorARM64* codegen);
146
147 private:
148 HPackedSwitch* const switch_instr_;
149 vixl::aarch64::Label table_start_;
150
151 DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
152 };
153
154 static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] =
155 { vixl::aarch64::x0,
156 vixl::aarch64::x1,
157 vixl::aarch64::x2,
158 vixl::aarch64::x3,
159 vixl::aarch64::x4,
160 vixl::aarch64::x5,
161 vixl::aarch64::x6,
162 vixl::aarch64::x7 };
163 static constexpr size_t kRuntimeParameterCoreRegistersLength =
164 arraysize(kRuntimeParameterCoreRegisters);
165 static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] =
166 { vixl::aarch64::d0,
167 vixl::aarch64::d1,
168 vixl::aarch64::d2,
169 vixl::aarch64::d3,
170 vixl::aarch64::d4,
171 vixl::aarch64::d5,
172 vixl::aarch64::d6,
173 vixl::aarch64::d7 };
174 static constexpr size_t kRuntimeParameterFpuRegistersLength =
175 arraysize(kRuntimeParameterCoreRegisters);
176
177 class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
178 vixl::aarch64::VRegister> {
179 public:
180 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
181
InvokeRuntimeCallingConvention()182 InvokeRuntimeCallingConvention()
183 : CallingConvention(kRuntimeParameterCoreRegisters,
184 kRuntimeParameterCoreRegistersLength,
185 kRuntimeParameterFpuRegisters,
186 kRuntimeParameterFpuRegistersLength,
187 kArm64PointerSize) {}
188
189 Location GetReturnLocation(DataType::Type return_type);
190
191 private:
192 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
193 };
194
195 class InvokeDexCallingConvention : public CallingConvention<vixl::aarch64::Register,
196 vixl::aarch64::VRegister> {
197 public:
InvokeDexCallingConvention()198 InvokeDexCallingConvention()
199 : CallingConvention(kParameterCoreRegisters,
200 kParameterCoreRegistersLength,
201 kParameterFPRegisters,
202 kParameterFPRegistersLength,
203 kArm64PointerSize) {}
204
GetReturnLocation(DataType::Type return_type)205 Location GetReturnLocation(DataType::Type return_type) const {
206 return ARM64ReturnLocation(return_type);
207 }
208
209
210 private:
211 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
212 };
213
214 class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
215 public:
InvokeDexCallingConventionVisitorARM64()216 InvokeDexCallingConventionVisitorARM64() {}
~InvokeDexCallingConventionVisitorARM64()217 virtual ~InvokeDexCallingConventionVisitorARM64() {}
218
219 Location GetNextLocation(DataType::Type type) override;
GetReturnLocation(DataType::Type return_type)220 Location GetReturnLocation(DataType::Type return_type) const override {
221 return calling_convention.GetReturnLocation(return_type);
222 }
223 Location GetMethodLocation() const override;
224
225 private:
226 InvokeDexCallingConvention calling_convention;
227
228 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
229 };
230
231 class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
232 public:
FieldAccessCallingConventionARM64()233 FieldAccessCallingConventionARM64() {}
234
GetObjectLocation()235 Location GetObjectLocation() const override {
236 return helpers::LocationFrom(vixl::aarch64::x1);
237 }
GetFieldIndexLocation()238 Location GetFieldIndexLocation() const override {
239 return helpers::LocationFrom(vixl::aarch64::x0);
240 }
GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED)241 Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
242 return helpers::LocationFrom(vixl::aarch64::x0);
243 }
GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,bool is_instance)244 Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
245 bool is_instance) const override {
246 return is_instance
247 ? helpers::LocationFrom(vixl::aarch64::x2)
248 : helpers::LocationFrom(vixl::aarch64::x1);
249 }
GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED)250 Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
251 return helpers::LocationFrom(vixl::aarch64::d0);
252 }
253
254 private:
255 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
256 };
257
258 class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
259 public:
260 InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
261
262 #define DECLARE_VISIT_INSTRUCTION(name, super) \
263 void Visit##name(H##name* instr) override;
264
265 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)266 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
267 FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
268
269 #undef DECLARE_VISIT_INSTRUCTION
270
271 void VisitInstruction(HInstruction* instruction) override {
272 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
273 << " (id " << instruction->GetId() << ")";
274 }
275
GetAssembler()276 Arm64Assembler* GetAssembler() const { return assembler_; }
GetVIXLAssembler()277 vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
278
279 private:
280 void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
281 vixl::aarch64::Register class_reg);
282 void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
283 vixl::aarch64::Register temp);
284 void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
285 void HandleBinaryOp(HBinaryOperation* instr);
286
287 void HandleFieldSet(HInstruction* instruction,
288 const FieldInfo& field_info,
289 bool value_can_be_null);
290 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
291 void HandleCondition(HCondition* instruction);
292
293 // Generate a heap reference load using one register `out`:
294 //
295 // out <- *(out + offset)
296 //
297 // while honoring heap poisoning and/or read barriers (if any).
298 //
299 // Location `maybe_temp` is used when generating a read barrier and
300 // shall be a register in that case; it may be an invalid location
301 // otherwise.
302 void GenerateReferenceLoadOneRegister(HInstruction* instruction,
303 Location out,
304 uint32_t offset,
305 Location maybe_temp,
306 ReadBarrierOption read_barrier_option);
307 // Generate a heap reference load using two different registers
308 // `out` and `obj`:
309 //
310 // out <- *(obj + offset)
311 //
312 // while honoring heap poisoning and/or read barriers (if any).
313 //
314 // Location `maybe_temp` is used when generating a Baker's (fast
315 // path) read barrier and shall be a register in that case; it may
316 // be an invalid location otherwise.
317 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
318 Location out,
319 Location obj,
320 uint32_t offset,
321 Location maybe_temp,
322 ReadBarrierOption read_barrier_option);
323
324 // Generate a floating-point comparison.
325 void GenerateFcmp(HInstruction* instruction);
326
327 void HandleShift(HBinaryOperation* instr);
328 void GenerateTestAndBranch(HInstruction* instruction,
329 size_t condition_input_index,
330 vixl::aarch64::Label* true_target,
331 vixl::aarch64::Label* false_target);
332 void DivRemOneOrMinusOne(HBinaryOperation* instruction);
333 void DivRemByPowerOfTwo(HBinaryOperation* instruction);
334 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
335 void GenerateIntDiv(HDiv* instruction);
336 void GenerateIntDivForConstDenom(HDiv *instruction);
337 void GenerateIntDivForPower2Denom(HDiv *instruction);
338 void GenerateIntRem(HRem* instruction);
339 void GenerateIntRemForConstDenom(HRem *instruction);
340 void GenerateIntRemForPower2Denom(HRem *instruction);
341 void HandleGoto(HInstruction* got, HBasicBlock* successor);
342
343 vixl::aarch64::MemOperand VecAddress(
344 HVecMemoryOperation* instruction,
345 // This function may acquire a scratch register.
346 vixl::aarch64::UseScratchRegisterScope* temps_scope,
347 size_t size,
348 bool is_string_char_at,
349 /*out*/ vixl::aarch64::Register* scratch);
350
351 Arm64Assembler* const assembler_;
352 CodeGeneratorARM64* const codegen_;
353
354 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorARM64);
355 };
356
357 class LocationsBuilderARM64 : public HGraphVisitor {
358 public:
LocationsBuilderARM64(HGraph * graph,CodeGeneratorARM64 * codegen)359 LocationsBuilderARM64(HGraph* graph, CodeGeneratorARM64* codegen)
360 : HGraphVisitor(graph), codegen_(codegen) {}
361
362 #define DECLARE_VISIT_INSTRUCTION(name, super) \
363 void Visit##name(H##name* instr) override;
364
365 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)366 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
367 FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
368
369 #undef DECLARE_VISIT_INSTRUCTION
370
371 void VisitInstruction(HInstruction* instruction) override {
372 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
373 << " (id " << instruction->GetId() << ")";
374 }
375
376 private:
377 void HandleBinaryOp(HBinaryOperation* instr);
378 void HandleFieldSet(HInstruction* instruction);
379 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
380 void HandleInvoke(HInvoke* instr);
381 void HandleCondition(HCondition* instruction);
382 void HandleShift(HBinaryOperation* instr);
383
384 CodeGeneratorARM64* const codegen_;
385 InvokeDexCallingConventionVisitorARM64 parameter_visitor_;
386
387 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
388 };
389
390 class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
391 public:
ParallelMoveResolverARM64(ArenaAllocator * allocator,CodeGeneratorARM64 * codegen)392 ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
393 : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {}
394
395 protected:
396 void PrepareForEmitNativeCode() override;
397 void FinishEmitNativeCode() override;
398 Location AllocateScratchLocationFor(Location::Kind kind) override;
399 void FreeScratchLocation(Location loc) override;
400 void EmitMove(size_t index) override;
401
402 private:
403 Arm64Assembler* GetAssembler() const;
GetVIXLAssembler()404 vixl::aarch64::MacroAssembler* GetVIXLAssembler() const {
405 return GetAssembler()->GetVIXLAssembler();
406 }
407
408 CodeGeneratorARM64* const codegen_;
409 vixl::aarch64::UseScratchRegisterScope vixl_temps_;
410
411 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
412 };
413
414 class CodeGeneratorARM64 : public CodeGenerator {
415 public:
416 CodeGeneratorARM64(HGraph* graph,
417 const CompilerOptions& compiler_options,
418 OptimizingCompilerStats* stats = nullptr);
~CodeGeneratorARM64()419 virtual ~CodeGeneratorARM64() {}
420
421 void GenerateFrameEntry() override;
422 void GenerateFrameExit() override;
423
424 vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const;
425 vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const;
426
427 void Bind(HBasicBlock* block) override;
428
GetLabelOf(HBasicBlock * block)429 vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) {
430 block = FirstNonEmptyBlock(block);
431 return &(block_labels_[block->GetBlockId()]);
432 }
433
GetWordSize()434 size_t GetWordSize() const override {
435 return kArm64WordSize;
436 }
437
GetSlowPathFPWidth()438 size_t GetSlowPathFPWidth() const override {
439 return GetGraph()->HasSIMD()
440 ? vixl::aarch64::kQRegSizeInBytes
441 : vixl::aarch64::kDRegSizeInBytes;
442 }
443
GetCalleePreservedFPWidth()444 size_t GetCalleePreservedFPWidth() const override {
445 return vixl::aarch64::kDRegSizeInBytes;
446 }
447
GetAddressOf(HBasicBlock * block)448 uintptr_t GetAddressOf(HBasicBlock* block) override {
449 vixl::aarch64::Label* block_entry_label = GetLabelOf(block);
450 DCHECK(block_entry_label->IsBound());
451 return block_entry_label->GetLocation();
452 }
453
GetLocationBuilder()454 HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
GetInstructionVisitor()455 HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
GetAssembler()456 Arm64Assembler* GetAssembler() override { return &assembler_; }
GetAssembler()457 const Arm64Assembler& GetAssembler() const override { return assembler_; }
GetVIXLAssembler()458 vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
459
460 // Emit a write barrier.
461 void MarkGCCard(vixl::aarch64::Register object,
462 vixl::aarch64::Register value,
463 bool value_can_be_null);
464
465 void GenerateMemoryBarrier(MemBarrierKind kind);
466
467 // Register allocation.
468
469 void SetupBlockedRegisters() const override;
470
471 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
472 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
473 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
474 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
475
476 // The number of registers that can be allocated. The register allocator may
477 // decide to reserve and not use a few of them.
478 // We do not consider registers sp, xzr, wzr. They are either not allocatable
479 // (xzr, wzr), or make for poor allocatable registers (sp alignment
480 // requirements, etc.). This also facilitates our task as all other registers
481 // can easily be mapped via to or from their type and index or code.
482 static const int kNumberOfAllocatableRegisters = vixl::aarch64::kNumberOfRegisters - 1;
483 static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfVRegisters;
484 static constexpr int kNumberOfAllocatableRegisterPairs = 0;
485
486 void DumpCoreRegister(std::ostream& stream, int reg) const override;
487 void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
488
GetInstructionSet()489 InstructionSet GetInstructionSet() const override {
490 return InstructionSet::kArm64;
491 }
492
493 const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const;
494
Initialize()495 void Initialize() override {
496 block_labels_.resize(GetGraph()->GetBlocks().size());
497 }
498
499 // We want to use the STP and LDP instructions to spill and restore registers for slow paths.
500 // These instructions can only encode offsets that are multiples of the register size accessed.
GetPreferredSlotsAlignment()501 uint32_t GetPreferredSlotsAlignment() const override { return vixl::aarch64::kXRegSizeInBytes; }
502
CreateJumpTable(HPackedSwitch * switch_instr)503 JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
504 jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARM64(switch_instr));
505 return jump_tables_.back().get();
506 }
507
508 void Finalize(CodeAllocator* allocator) override;
509
510 // Code generation helpers.
511 void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
512 void MoveConstant(Location destination, int32_t value) override;
513 void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
514 void AddLocationAsTemp(Location location, LocationSummary* locations) override;
515
516 void Load(DataType::Type type,
517 vixl::aarch64::CPURegister dst,
518 const vixl::aarch64::MemOperand& src);
519 void Store(DataType::Type type,
520 vixl::aarch64::CPURegister src,
521 const vixl::aarch64::MemOperand& dst);
522 void LoadAcquire(HInstruction* instruction,
523 vixl::aarch64::CPURegister dst,
524 const vixl::aarch64::MemOperand& src,
525 bool needs_null_check);
526 void StoreRelease(HInstruction* instruction,
527 DataType::Type type,
528 vixl::aarch64::CPURegister src,
529 const vixl::aarch64::MemOperand& dst,
530 bool needs_null_check);
531
532 // Generate code to invoke a runtime entry point.
533 void InvokeRuntime(QuickEntrypointEnum entrypoint,
534 HInstruction* instruction,
535 uint32_t dex_pc,
536 SlowPathCode* slow_path = nullptr) override;
537
538 // Generate code to invoke a runtime entry point, but do not record
539 // PC-related information in a stack map.
540 void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
541 HInstruction* instruction,
542 SlowPathCode* slow_path);
543
GetMoveResolver()544 ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; }
545
NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED)546 bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
547 return false;
548 }
549
550 // Check if the desired_string_load_kind is supported. If it is, return it,
551 // otherwise return a fall-back kind that should be used instead.
552 HLoadString::LoadKind GetSupportedLoadStringKind(
553 HLoadString::LoadKind desired_string_load_kind) override;
554
555 // Check if the desired_class_load_kind is supported. If it is, return it,
556 // otherwise return a fall-back kind that should be used instead.
557 HLoadClass::LoadKind GetSupportedLoadClassKind(
558 HLoadClass::LoadKind desired_class_load_kind) override;
559
560 // Check if the desired_dispatch_info is supported. If it is, return it,
561 // otherwise return a fall-back info that should be used instead.
562 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
563 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
564 ArtMethod* method) override;
565
566 void GenerateStaticOrDirectCall(
567 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
568 void GenerateVirtualCall(
569 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
570
MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,DataType::Type type ATTRIBUTE_UNUSED)571 void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
572 DataType::Type type ATTRIBUTE_UNUSED) override {
573 UNIMPLEMENTED(FATAL);
574 }
575
576 // Add a new boot image intrinsic patch for an instruction and return the label
577 // to be bound before the instruction. The instruction will be either the
578 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
579 // to the associated ADRP patch label).
580 vixl::aarch64::Label* NewBootImageIntrinsicPatch(uint32_t intrinsic_data,
581 vixl::aarch64::Label* adrp_label = nullptr);
582
583 // Add a new boot image relocation patch for an instruction and return the label
584 // to be bound before the instruction. The instruction will be either the
585 // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
586 // to the associated ADRP patch label).
587 vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset,
588 vixl::aarch64::Label* adrp_label = nullptr);
589
590 // Add a new boot image method patch for an instruction and return the label
591 // to be bound before the instruction. The instruction will be either the
592 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
593 // to the associated ADRP patch label).
594 vixl::aarch64::Label* NewBootImageMethodPatch(MethodReference target_method,
595 vixl::aarch64::Label* adrp_label = nullptr);
596
597 // Add a new .bss entry method patch for an instruction and return
598 // the label to be bound before the instruction. The instruction will be
599 // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
600 // pointing to the associated ADRP patch label).
601 vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method,
602 vixl::aarch64::Label* adrp_label = nullptr);
603
604 // Add a new boot image type patch for an instruction and return the label
605 // to be bound before the instruction. The instruction will be either the
606 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
607 // to the associated ADRP patch label).
608 vixl::aarch64::Label* NewBootImageTypePatch(const DexFile& dex_file,
609 dex::TypeIndex type_index,
610 vixl::aarch64::Label* adrp_label = nullptr);
611
612 // Add a new .bss entry type patch for an instruction and return the label
613 // to be bound before the instruction. The instruction will be either the
614 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
615 // to the associated ADRP patch label).
616 vixl::aarch64::Label* NewBssEntryTypePatch(const DexFile& dex_file,
617 dex::TypeIndex type_index,
618 vixl::aarch64::Label* adrp_label = nullptr);
619
620 // Add a new boot image string patch for an instruction and return the label
621 // to be bound before the instruction. The instruction will be either the
622 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
623 // to the associated ADRP patch label).
624 vixl::aarch64::Label* NewBootImageStringPatch(const DexFile& dex_file,
625 dex::StringIndex string_index,
626 vixl::aarch64::Label* adrp_label = nullptr);
627
628 // Add a new .bss entry string patch for an instruction and return the label
629 // to be bound before the instruction. The instruction will be either the
630 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
631 // to the associated ADRP patch label).
632 vixl::aarch64::Label* NewStringBssEntryPatch(const DexFile& dex_file,
633 dex::StringIndex string_index,
634 vixl::aarch64::Label* adrp_label = nullptr);
635
636 // Emit the BL instruction for entrypoint thunk call and record the associated patch for AOT.
637 void EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset);
638
639 // Emit the CBNZ instruction for baker read barrier and record
640 // the associated patch for AOT or slow path for JIT.
641 void EmitBakerReadBarrierCbnz(uint32_t custom_data);
642
643 vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
644 vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
645 dex::StringIndex string_index,
646 Handle<mirror::String> handle);
647 vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file,
648 dex::TypeIndex string_index,
649 Handle<mirror::Class> handle);
650
651 void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
652 void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
653 vixl::aarch64::Register out,
654 vixl::aarch64::Register base);
655 void EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
656 vixl::aarch64::Register out,
657 vixl::aarch64::Register base);
658
659 void LoadBootImageAddress(vixl::aarch64::Register reg, uint32_t boot_image_reference);
660 void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset);
661
662 void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
663 bool NeedsThunkCode(const linker::LinkerPatch& patch) const override;
664 void EmitThunkCode(const linker::LinkerPatch& patch,
665 /*out*/ ArenaVector<uint8_t>* code,
666 /*out*/ std::string* debug_name) override;
667
668 void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
669
670 // Generate a GC root reference load:
671 //
672 // root <- *(obj + offset)
673 //
674 // while honoring read barriers based on read_barrier_option.
675 void GenerateGcRootFieldLoad(HInstruction* instruction,
676 Location root,
677 vixl::aarch64::Register obj,
678 uint32_t offset,
679 vixl::aarch64::Label* fixup_label,
680 ReadBarrierOption read_barrier_option);
681 // Generate MOV for the `old_value` in UnsafeCASObject and mark it with Baker read barrier.
682 void GenerateUnsafeCasOldValueMovWithBakerReadBarrier(vixl::aarch64::Register marked,
683 vixl::aarch64::Register old_value);
684 // Fast path implementation of ReadBarrier::Barrier for a heap
685 // reference field load when Baker's read barriers are used.
686 // Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
687 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
688 Location ref,
689 vixl::aarch64::Register obj,
690 const vixl::aarch64::MemOperand& src,
691 bool needs_null_check,
692 bool use_load_acquire);
693 // Fast path implementation of ReadBarrier::Barrier for a heap
694 // reference field load when Baker's read barriers are used.
695 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
696 Location ref,
697 vixl::aarch64::Register obj,
698 uint32_t offset,
699 Location maybe_temp,
700 bool needs_null_check,
701 bool use_load_acquire);
702 // Fast path implementation of ReadBarrier::Barrier for a heap
703 // reference array load when Baker's read barriers are used.
704 void GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
705 Location ref,
706 vixl::aarch64::Register obj,
707 uint32_t data_offset,
708 Location index,
709 bool needs_null_check);
710
711 // Emit code checking the status of the Marking Register, and
712 // aborting the program if MR does not match the value stored in the
713 // art::Thread object. Code is only emitted in debug mode and if
714 // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
715 //
716 // Argument `code` is used to identify the different occurrences of
717 // MaybeGenerateMarkingRegisterCheck in the code generator, and is
718 // passed to the BRK instruction.
719 //
720 // If `temp_loc` is a valid location, it is expected to be a
721 // register and will be used as a temporary to generate code;
722 // otherwise, a temporary will be fetched from the core register
723 // scratch pool.
724 virtual void MaybeGenerateMarkingRegisterCheck(int code,
725 Location temp_loc = Location::NoLocation());
726
727 // Generate a read barrier for a heap reference within `instruction`
728 // using a slow path.
729 //
730 // A read barrier for an object reference read from the heap is
731 // implemented as a call to the artReadBarrierSlow runtime entry
732 // point, which is passed the values in locations `ref`, `obj`, and
733 // `offset`:
734 //
735 // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
736 // mirror::Object* obj,
737 // uint32_t offset);
738 //
739 // The `out` location contains the value returned by
740 // artReadBarrierSlow.
741 //
742 // When `index` is provided (i.e. for array accesses), the offset
743 // value passed to artReadBarrierSlow is adjusted to take `index`
744 // into account.
745 void GenerateReadBarrierSlow(HInstruction* instruction,
746 Location out,
747 Location ref,
748 Location obj,
749 uint32_t offset,
750 Location index = Location::NoLocation());
751
752 // If read barriers are enabled, generate a read barrier for a heap
753 // reference using a slow path. If heap poisoning is enabled, also
754 // unpoison the reference in `out`.
755 void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
756 Location out,
757 Location ref,
758 Location obj,
759 uint32_t offset,
760 Location index = Location::NoLocation());
761
762 // Generate a read barrier for a GC root within `instruction` using
763 // a slow path.
764 //
765 // A read barrier for an object reference GC root is implemented as
766 // a call to the artReadBarrierForRootSlow runtime entry point,
767 // which is passed the value in location `root`:
768 //
769 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
770 //
771 // The `out` location contains the value returned by
772 // artReadBarrierForRootSlow.
773 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
774
775 void GenerateNop() override;
776
777 void GenerateImplicitNullCheck(HNullCheck* instruction) override;
778 void GenerateExplicitNullCheck(HNullCheck* instruction) override;
779
MaybeRecordImplicitNullCheck(HInstruction * instr)780 void MaybeRecordImplicitNullCheck(HInstruction* instr) final {
781 // The function must be only called within special scopes
782 // (EmissionCheckScope, ExactAssemblyScope) which prevent generation of
783 // veneer/literal pools by VIXL assembler.
784 CHECK_EQ(GetVIXLAssembler()->ArePoolsBlocked(), true)
785 << "The function must only be called within EmissionCheckScope or ExactAssemblyScope";
786 CodeGenerator::MaybeRecordImplicitNullCheck(instr);
787 }
788
789 void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass);
790 void MaybeIncrementHotness(bool is_frame_entry);
791
792 private:
793 // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
794
795 enum class BakerReadBarrierKind : uint8_t {
796 kField, // Field get or array get with constant offset (i.e. constant index).
797 kAcquire, // Volatile field get.
798 kArray, // Array get with index in register.
799 kGcRoot, // GC root load.
800 kLast = kGcRoot
801 };
802
803 static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u;
804
805 static constexpr size_t kBitsForBakerReadBarrierKind =
806 MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
807 static constexpr size_t kBakerReadBarrierBitsForRegister =
808 MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
809 using BakerReadBarrierKindField =
810 BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
811 using BakerReadBarrierFirstRegField =
812 BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
813 using BakerReadBarrierSecondRegField =
814 BitField<uint32_t,
815 kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
816 kBakerReadBarrierBitsForRegister>;
817
CheckValidReg(uint32_t reg)818 static void CheckValidReg(uint32_t reg) {
819 DCHECK(reg < vixl::aarch64::lr.GetCode() &&
820 reg != vixl::aarch64::ip0.GetCode() &&
821 reg != vixl::aarch64::ip1.GetCode()) << reg;
822 }
823
EncodeBakerReadBarrierFieldData(uint32_t base_reg,uint32_t holder_reg)824 static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
825 CheckValidReg(base_reg);
826 CheckValidReg(holder_reg);
827 return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
828 BakerReadBarrierFirstRegField::Encode(base_reg) |
829 BakerReadBarrierSecondRegField::Encode(holder_reg);
830 }
831
EncodeBakerReadBarrierAcquireData(uint32_t base_reg,uint32_t holder_reg)832 static inline uint32_t EncodeBakerReadBarrierAcquireData(uint32_t base_reg, uint32_t holder_reg) {
833 CheckValidReg(base_reg);
834 CheckValidReg(holder_reg);
835 DCHECK_NE(base_reg, holder_reg);
836 return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kAcquire) |
837 BakerReadBarrierFirstRegField::Encode(base_reg) |
838 BakerReadBarrierSecondRegField::Encode(holder_reg);
839 }
840
EncodeBakerReadBarrierArrayData(uint32_t base_reg)841 static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
842 CheckValidReg(base_reg);
843 return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
844 BakerReadBarrierFirstRegField::Encode(base_reg) |
845 BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
846 }
847
EncodeBakerReadBarrierGcRootData(uint32_t root_reg)848 static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
849 CheckValidReg(root_reg);
850 return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
851 BakerReadBarrierFirstRegField::Encode(root_reg) |
852 BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
853 }
854
855 void CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
856 uint32_t encoded_data,
857 /*out*/ std::string* debug_name);
858
859 using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
860 using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
861 using StringToLiteralMap = ArenaSafeMap<StringReference,
862 vixl::aarch64::Literal<uint32_t>*,
863 StringReferenceValueComparator>;
864 using TypeToLiteralMap = ArenaSafeMap<TypeReference,
865 vixl::aarch64::Literal<uint32_t>*,
866 TypeReferenceValueComparator>;
867
868 vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value);
869 vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
870
871 // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
872 // whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
873 struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> {
PcRelativePatchInfoPcRelativePatchInfo874 PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
875 : PatchInfo<vixl::aarch64::Label>(dex_file, off_or_idx), pc_insn_label() { }
876
877 vixl::aarch64::Label* pc_insn_label;
878 };
879
880 struct BakerReadBarrierPatchInfo {
BakerReadBarrierPatchInfoBakerReadBarrierPatchInfo881 explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
882
883 vixl::aarch64::Label label;
884 uint32_t custom_data;
885 };
886
887 vixl::aarch64::Label* NewPcRelativePatch(const DexFile* dex_file,
888 uint32_t offset_or_index,
889 vixl::aarch64::Label* adrp_label,
890 ArenaDeque<PcRelativePatchInfo>* patches);
891
892 void EmitJumpTables();
893
894 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
895 static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
896 ArenaVector<linker::LinkerPatch>* linker_patches);
897
898 // Labels for each block that will be compiled.
899 // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
900 ArenaDeque<vixl::aarch64::Label> block_labels_; // Indexed by block id.
901 vixl::aarch64::Label frame_entry_label_;
902 ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
903
904 LocationsBuilderARM64 location_builder_;
905 InstructionCodeGeneratorARM64 instruction_visitor_;
906 ParallelMoveResolverARM64 move_resolver_;
907 Arm64Assembler assembler_;
908
909 // PC-relative method patch info for kBootImageLinkTimePcRelative.
910 ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
911 // PC-relative method patch info for kBssEntry.
912 ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
913 // PC-relative type patch info for kBootImageLinkTimePcRelative.
914 ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
915 // PC-relative type patch info for kBssEntry.
916 ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
917 // PC-relative String patch info for kBootImageLinkTimePcRelative.
918 ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
919 // PC-relative String patch info for kBssEntry.
920 ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
921 // PC-relative patch info for IntrinsicObjects for the boot image,
922 // and for method/type/string patches for kBootImageRelRo otherwise.
923 ArenaDeque<PcRelativePatchInfo> boot_image_other_patches_;
924 // Patch info for calls to entrypoint dispatch thunks. Used for slow paths.
925 ArenaDeque<PatchInfo<vixl::aarch64::Label>> call_entrypoint_patches_;
926 // Baker read barrier patch info.
927 ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
928
929 // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
930 Uint32ToLiteralMap uint32_literals_;
931 // Deduplication map for 64-bit literals, used for JIT for method address or method code.
932 Uint64ToLiteralMap uint64_literals_;
933 // Patches for string literals in JIT compiled code.
934 StringToLiteralMap jit_string_patches_;
935 // Patches for class literals in JIT compiled code.
936 TypeToLiteralMap jit_class_patches_;
937
938 // Baker read barrier slow paths, mapping custom data (uint32_t) to label.
939 // Wrap the label to work around vixl::aarch64::Label being non-copyable
940 // and non-moveable and as such unusable in ArenaSafeMap<>.
941 struct LabelWrapper {
LabelWrapperLabelWrapper942 LabelWrapper(const LabelWrapper& src)
943 : label() {
944 DCHECK(!src.label.IsLinked() && !src.label.IsBound());
945 }
946 LabelWrapper() = default;
947 vixl::aarch64::Label label;
948 };
949 ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_;
950
951 friend class linker::Arm64RelativePatcherTest;
952 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
953 };
954
GetAssembler()955 inline Arm64Assembler* ParallelMoveResolverARM64::GetAssembler() const {
956 return codegen_->GetAssembler();
957 }
958
959 } // namespace arm64
960 } // namespace art
961
962 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
963