1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm_vixl.h"
18 
19 #include "arch/arm/asm_support_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "arch/arm/jni_frame_arm.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "base/bit_utils_iterator.h"
25 #include "base/globals.h"
26 #include "class_root-inl.h"
27 #include "class_table.h"
28 #include "code_generator_utils.h"
29 #include "common_arm.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "gc/accounting/card_table.h"
32 #include "gc/space/image_space.h"
33 #include "heap_poisoning.h"
34 #include "interpreter/mterp/nterp.h"
35 #include "intrinsics.h"
36 #include "intrinsics_arm_vixl.h"
37 #include "intrinsics_list.h"
38 #include "intrinsics_utils.h"
39 #include "jit/profiling_info.h"
40 #include "linker/linker_patch.h"
41 #include "mirror/array-inl.h"
42 #include "mirror/class-inl.h"
43 #include "mirror/var_handle.h"
44 #include "profiling_info_builder.h"
45 #include "scoped_thread_state_change-inl.h"
46 #include "thread.h"
47 #include "trace.h"
48 #include "utils/arm/assembler_arm_vixl.h"
49 #include "utils/arm/managed_register_arm.h"
50 #include "utils/assembler.h"
51 #include "utils/stack_checks.h"
52 
53 namespace art HIDDEN {
54 namespace arm {
55 
56 namespace vixl32 = vixl::aarch32;
57 using namespace vixl32;  // NOLINT(build/namespaces)
58 
59 using helpers::DRegisterFrom;
60 using helpers::HighRegisterFrom;
61 using helpers::InputDRegisterAt;
62 using helpers::InputOperandAt;
63 using helpers::InputRegister;
64 using helpers::InputRegisterAt;
65 using helpers::InputSRegisterAt;
66 using helpers::InputVRegister;
67 using helpers::InputVRegisterAt;
68 using helpers::Int32ConstantFrom;
69 using helpers::Int64ConstantFrom;
70 using helpers::LocationFrom;
71 using helpers::LowRegisterFrom;
72 using helpers::LowSRegisterFrom;
73 using helpers::OperandFrom;
74 using helpers::OutputRegister;
75 using helpers::OutputSRegister;
76 using helpers::OutputVRegister;
77 using helpers::RegisterFrom;
78 using helpers::SRegisterFrom;
79 using helpers::Uint64ConstantFrom;
80 
81 using vixl::EmissionCheckScope;
82 using vixl::ExactAssemblyScope;
83 using vixl::CodeBufferCheckScope;
84 
85 using RegisterList = vixl32::RegisterList;
86 
ExpectedPairLayout(Location location)87 static bool ExpectedPairLayout(Location location) {
88   // We expected this for both core and fpu register pairs.
89   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
90 }
91 // Use a local definition to prevent copying mistakes.
92 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
93 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
94 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
95 
96 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
97 // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
98 // For the Baker read barrier implementation using link-time generated thunks we need to split
99 // the offset explicitly.
100 constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
101 
102 // Using a base helps identify when we hit Marking Register check breakpoints.
103 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
104 
105 #ifdef __
106 #error "ARM Codegen VIXL macro-assembler macro already defined."
107 #endif
108 
109 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
110 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()->  // NOLINT
111 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
112 
113 // Marker that code is yet to be, and must, be implemented.
114 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
115 
CanEmitNarrowLdr(vixl32::Register rt,vixl32::Register rn,uint32_t offset)116 static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
117   return rt.IsLow() && rn.IsLow() && offset < 32u;
118 }
119 
120 class EmitAdrCode {
121  public:
EmitAdrCode(ArmVIXLMacroAssembler * assembler,vixl32::Register rd,vixl32::Label * label)122   EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
123       : assembler_(assembler), rd_(rd), label_(label) {
124     DCHECK(!assembler->AllowMacroInstructions());  // In ExactAssemblyScope.
125     adr_location_ = assembler->GetCursorOffset();
126     assembler->adr(EncodingSize(Wide), rd, label);
127   }
128 
~EmitAdrCode()129   ~EmitAdrCode() {
130     DCHECK(label_->IsBound());
131     // The ADR emitted by the assembler does not set the Thumb mode bit we need.
132     // TODO: Maybe extend VIXL to allow ADR for return address?
133     uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
134     // Expecting ADR encoding T3 with `(offset & 1) == 0`.
135     DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u);           // Check bits 24-31, except 26.
136     DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu);           // Check bits 16-23.
137     DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode());   // Check bits 8-11 and 15.
138     DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u);           // Check bit 0, i.e. the `offset & 1`.
139     // Add the Thumb mode bit.
140     raw_adr[2] |= 0x01u;
141   }
142 
143  private:
144   ArmVIXLMacroAssembler* const assembler_;
145   vixl32::Register rd_;
146   vixl32::Label* const label_;
147   int32_t adr_location_;
148 };
149 
OneRegInReferenceOutSaveEverythingCallerSaves()150 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
151   InvokeRuntimeCallingConventionARMVIXL calling_convention;
152   RegisterSet caller_saves = RegisterSet::Empty();
153   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
154   // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
155   // that the kPrimNot result register is the same as the first argument register.
156   return caller_saves;
157 }
158 
159 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
160 // for each live D registers they treat two corresponding S registers as live ones.
161 //
162 // Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
163 // from a list of contiguous S registers a list of contiguous D registers (processing first/last
164 // S registers corner cases) and save/restore this new list treating them as D registers.
165 // - decreasing code size
166 // - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
167 //   restored and then used in regular non SlowPath code as D register.
168 //
169 // For the following example (v means the S register is live):
170 //   D names: |    D0   |    D1   |    D2   |    D4   | ...
171 //   S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
172 //   Live?    |    |  v |  v |  v |  v |  v |  v |    | ...
173 //
174 // S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
175 // as D registers.
176 //
177 // TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
178 // for lists of floating-point registers.
SaveContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)179 static size_t SaveContiguousSRegisterList(size_t first,
180                                           size_t last,
181                                           CodeGenerator* codegen,
182                                           size_t stack_offset) {
183   static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
184   static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
185   DCHECK_LE(first, last);
186   if ((first == last) && (first == 0)) {
187     __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
188     return stack_offset + kSRegSizeInBytes;
189   }
190   if (first % 2 == 1) {
191     __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
192     stack_offset += kSRegSizeInBytes;
193   }
194 
195   bool save_last = false;
196   if (last % 2 == 0) {
197     save_last = true;
198     --last;
199   }
200 
201   if (first < last) {
202     vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
203     DCHECK_EQ((last - first + 1) % 2, 0u);
204     size_t number_of_d_regs = (last - first + 1) / 2;
205 
206     if (number_of_d_regs == 1) {
207       __ Vstr(d_reg, MemOperand(sp, stack_offset));
208     } else if (number_of_d_regs > 1) {
209       UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
210       vixl32::Register base = sp;
211       if (stack_offset != 0) {
212         base = temps.Acquire();
213         __ Add(base, sp, Operand::From(stack_offset));
214       }
215       __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
216     }
217     stack_offset += number_of_d_regs * kDRegSizeInBytes;
218   }
219 
220   if (save_last) {
221     __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
222     stack_offset += kSRegSizeInBytes;
223   }
224 
225   return stack_offset;
226 }
227 
RestoreContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)228 static size_t RestoreContiguousSRegisterList(size_t first,
229                                              size_t last,
230                                              CodeGenerator* codegen,
231                                              size_t stack_offset) {
232   static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
233   static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
234   DCHECK_LE(first, last);
235   if ((first == last) && (first == 0)) {
236     __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
237     return stack_offset + kSRegSizeInBytes;
238   }
239   if (first % 2 == 1) {
240     __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
241     stack_offset += kSRegSizeInBytes;
242   }
243 
244   bool restore_last = false;
245   if (last % 2 == 0) {
246     restore_last = true;
247     --last;
248   }
249 
250   if (first < last) {
251     vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
252     DCHECK_EQ((last - first + 1) % 2, 0u);
253     size_t number_of_d_regs = (last - first + 1) / 2;
254     if (number_of_d_regs == 1) {
255       __ Vldr(d_reg, MemOperand(sp, stack_offset));
256     } else if (number_of_d_regs > 1) {
257       UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
258       vixl32::Register base = sp;
259       if (stack_offset != 0) {
260         base = temps.Acquire();
261         __ Add(base, sp, Operand::From(stack_offset));
262       }
263       __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
264     }
265     stack_offset += number_of_d_regs * kDRegSizeInBytes;
266   }
267 
268   if (restore_last) {
269     __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
270     stack_offset += kSRegSizeInBytes;
271   }
272 
273   return stack_offset;
274 }
275 
GetLoadOperandType(DataType::Type type)276 static LoadOperandType GetLoadOperandType(DataType::Type type) {
277   switch (type) {
278     case DataType::Type::kReference:
279       return kLoadWord;
280     case DataType::Type::kBool:
281     case DataType::Type::kUint8:
282       return kLoadUnsignedByte;
283     case DataType::Type::kInt8:
284       return kLoadSignedByte;
285     case DataType::Type::kUint16:
286       return kLoadUnsignedHalfword;
287     case DataType::Type::kInt16:
288       return kLoadSignedHalfword;
289     case DataType::Type::kInt32:
290       return kLoadWord;
291     case DataType::Type::kInt64:
292       return kLoadWordPair;
293     case DataType::Type::kFloat32:
294       return kLoadSWord;
295     case DataType::Type::kFloat64:
296       return kLoadDWord;
297     default:
298       LOG(FATAL) << "Unreachable type " << type;
299       UNREACHABLE();
300   }
301 }
302 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)303 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
304   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
305   size_t orig_offset = stack_offset;
306 
307   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
308   for (uint32_t i : LowToHighBits(core_spills)) {
309     // If the register holds an object, update the stack mask.
310     if (locations->RegisterContainsObject(i)) {
311       locations->SetStackBit(stack_offset / kVRegSize);
312     }
313     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
314     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
315     saved_core_stack_offsets_[i] = stack_offset;
316     stack_offset += kArmWordSize;
317   }
318 
319   CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
320   arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
321 
322   uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
323   orig_offset = stack_offset;
324   for (uint32_t i : LowToHighBits(fp_spills)) {
325     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
326     saved_fpu_stack_offsets_[i] = stack_offset;
327     stack_offset += kArmWordSize;
328   }
329 
330   stack_offset = orig_offset;
331   while (fp_spills != 0u) {
332     uint32_t begin = CTZ(fp_spills);
333     uint32_t tmp = fp_spills + (1u << begin);
334     fp_spills &= tmp;  // Clear the contiguous range of 1s.
335     uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
336     stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
337   }
338   DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
339 }
340 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)341 void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
342   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
343   size_t orig_offset = stack_offset;
344 
345   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
346   for (uint32_t i : LowToHighBits(core_spills)) {
347     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
348     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
349     stack_offset += kArmWordSize;
350   }
351 
352   // TODO(VIXL): Check the coherency of stack_offset after this with a test.
353   CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
354   arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
355 
356   uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
357   while (fp_spills != 0u) {
358     uint32_t begin = CTZ(fp_spills);
359     uint32_t tmp = fp_spills + (1u << begin);
360     fp_spills &= tmp;  // Clear the contiguous range of 1s.
361     uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
362     stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
363   }
364   DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
365 }
366 
367 class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
368  public:
NullCheckSlowPathARMVIXL(HNullCheck * instruction)369   explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
370 
EmitNativeCode(CodeGenerator * codegen)371   void EmitNativeCode(CodeGenerator* codegen) override {
372     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
373     __ Bind(GetEntryLabel());
374     if (instruction_->CanThrowIntoCatchBlock()) {
375       // Live registers will be restored in the catch block if caught.
376       SaveLiveRegisters(codegen, instruction_->GetLocations());
377     }
378     arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
379                                instruction_,
380                                instruction_->GetDexPc(),
381                                this);
382     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
383   }
384 
IsFatal() const385   bool IsFatal() const override { return true; }
386 
GetDescription() const387   const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
388 
389  private:
390   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
391 };
392 
393 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
394  public:
DivZeroCheckSlowPathARMVIXL(HDivZeroCheck * instruction)395   explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
396       : SlowPathCodeARMVIXL(instruction) {}
397 
EmitNativeCode(CodeGenerator * codegen)398   void EmitNativeCode(CodeGenerator* codegen) override {
399     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
400     __ Bind(GetEntryLabel());
401     arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
402     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
403   }
404 
IsFatal() const405   bool IsFatal() const override { return true; }
406 
GetDescription() const407   const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
408 
409  private:
410   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
411 };
412 
413 class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
414  public:
SuspendCheckSlowPathARMVIXL(HSuspendCheck * instruction,HBasicBlock * successor)415   SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
416       : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
417 
EmitNativeCode(CodeGenerator * codegen)418   void EmitNativeCode(CodeGenerator* codegen) override {
419     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
420     __ Bind(GetEntryLabel());
421     arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
422     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
423     if (successor_ == nullptr) {
424       __ B(GetReturnLabel());
425     } else {
426       __ B(arm_codegen->GetLabelOf(successor_));
427     }
428   }
429 
GetReturnLabel()430   vixl32::Label* GetReturnLabel() {
431     DCHECK(successor_ == nullptr);
432     return &return_label_;
433   }
434 
GetSuccessor() const435   HBasicBlock* GetSuccessor() const {
436     return successor_;
437   }
438 
GetDescription() const439   const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
440 
441  private:
442   // If not null, the block to branch to after the suspend check.
443   HBasicBlock* const successor_;
444 
445   // If `successor_` is null, the label to branch to after the suspend check.
446   vixl32::Label return_label_;
447 
448   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
449 };
450 
451 class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
452  public:
BoundsCheckSlowPathARMVIXL(HBoundsCheck * instruction)453   explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
454       : SlowPathCodeARMVIXL(instruction) {}
455 
EmitNativeCode(CodeGenerator * codegen)456   void EmitNativeCode(CodeGenerator* codegen) override {
457     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
458     LocationSummary* locations = instruction_->GetLocations();
459 
460     __ Bind(GetEntryLabel());
461     if (instruction_->CanThrowIntoCatchBlock()) {
462       // Live registers will be restored in the catch block if caught.
463       SaveLiveRegisters(codegen, instruction_->GetLocations());
464     }
465     // We're moving two locations to locations that could overlap, so we need a parallel
466     // move resolver.
467     InvokeRuntimeCallingConventionARMVIXL calling_convention;
468     codegen->EmitParallelMoves(
469         locations->InAt(0),
470         LocationFrom(calling_convention.GetRegisterAt(0)),
471         DataType::Type::kInt32,
472         locations->InAt(1),
473         LocationFrom(calling_convention.GetRegisterAt(1)),
474         DataType::Type::kInt32);
475     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
476         ? kQuickThrowStringBounds
477         : kQuickThrowArrayBounds;
478     arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
479     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
480     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
481   }
482 
IsFatal() const483   bool IsFatal() const override { return true; }
484 
GetDescription() const485   const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
486 
487  private:
488   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
489 };
490 
491 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
492  public:
LoadClassSlowPathARMVIXL(HLoadClass * cls,HInstruction * at)493   LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
494       : SlowPathCodeARMVIXL(at), cls_(cls) {
495     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
496     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
497   }
498 
EmitNativeCode(CodeGenerator * codegen)499   void EmitNativeCode(CodeGenerator* codegen) override {
500     LocationSummary* locations = instruction_->GetLocations();
501     Location out = locations->Out();
502     const uint32_t dex_pc = instruction_->GetDexPc();
503     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
504     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
505 
506     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
507     __ Bind(GetEntryLabel());
508     SaveLiveRegisters(codegen, locations);
509 
510     InvokeRuntimeCallingConventionARMVIXL calling_convention;
511     if (must_resolve_type) {
512       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()) ||
513              arm_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
514              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
515                              &cls_->GetDexFile()));
516       dex::TypeIndex type_index = cls_->GetTypeIndex();
517       __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
518       if (cls_->NeedsAccessCheck()) {
519         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
520         arm_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
521       } else {
522         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
523         arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
524       }
525       // If we also must_do_clinit, the resolved type is now in the correct register.
526     } else {
527       DCHECK(must_do_clinit);
528       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
529       arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
530     }
531     if (must_do_clinit) {
532       arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
533       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
534     }
535 
536     // Move the class to the desired location.
537     if (out.IsValid()) {
538       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
539       arm_codegen->Move32(locations->Out(), LocationFrom(r0));
540     }
541     RestoreLiveRegisters(codegen, locations);
542     __ B(GetExitLabel());
543   }
544 
GetDescription() const545   const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
546 
547  private:
548   // The class this slow path will load.
549   HLoadClass* const cls_;
550 
551   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
552 };
553 
554 class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
555  public:
LoadStringSlowPathARMVIXL(HLoadString * instruction)556   explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
557       : SlowPathCodeARMVIXL(instruction) {}
558 
EmitNativeCode(CodeGenerator * codegen)559   void EmitNativeCode(CodeGenerator* codegen) override {
560     DCHECK(instruction_->IsLoadString());
561     DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
562     LocationSummary* locations = instruction_->GetLocations();
563     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
564     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
565 
566     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
567     __ Bind(GetEntryLabel());
568     SaveLiveRegisters(codegen, locations);
569 
570     InvokeRuntimeCallingConventionARMVIXL calling_convention;
571     __ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
572     arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
573     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
574 
575     arm_codegen->Move32(locations->Out(), LocationFrom(r0));
576     RestoreLiveRegisters(codegen, locations);
577 
578     __ B(GetExitLabel());
579   }
580 
GetDescription() const581   const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
582 
583  private:
584   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
585 };
586 
587 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
588  public:
TypeCheckSlowPathARMVIXL(HInstruction * instruction,bool is_fatal)589   TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
590       : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
591 
EmitNativeCode(CodeGenerator * codegen)592   void EmitNativeCode(CodeGenerator* codegen) override {
593     LocationSummary* locations = instruction_->GetLocations();
594     DCHECK(instruction_->IsCheckCast()
595            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
596 
597     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
598     __ Bind(GetEntryLabel());
599 
600     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
601       SaveLiveRegisters(codegen, locations);
602     }
603 
604     // We're moving two locations to locations that could overlap, so we need a parallel
605     // move resolver.
606     InvokeRuntimeCallingConventionARMVIXL calling_convention;
607 
608     codegen->EmitParallelMoves(locations->InAt(0),
609                                LocationFrom(calling_convention.GetRegisterAt(0)),
610                                DataType::Type::kReference,
611                                locations->InAt(1),
612                                LocationFrom(calling_convention.GetRegisterAt(1)),
613                                DataType::Type::kReference);
614     if (instruction_->IsInstanceOf()) {
615       arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
616                                  instruction_,
617                                  instruction_->GetDexPc(),
618                                  this);
619       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
620       arm_codegen->Move32(locations->Out(), LocationFrom(r0));
621     } else {
622       DCHECK(instruction_->IsCheckCast());
623       arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
624                                  instruction_,
625                                  instruction_->GetDexPc(),
626                                  this);
627       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
628     }
629 
630     if (!is_fatal_) {
631       RestoreLiveRegisters(codegen, locations);
632       __ B(GetExitLabel());
633     }
634   }
635 
GetDescription() const636   const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
637 
IsFatal() const638   bool IsFatal() const override { return is_fatal_; }
639 
640  private:
641   const bool is_fatal_;
642 
643   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
644 };
645 
646 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
647  public:
DeoptimizationSlowPathARMVIXL(HDeoptimize * instruction)648   explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
649       : SlowPathCodeARMVIXL(instruction) {}
650 
EmitNativeCode(CodeGenerator * codegen)651   void EmitNativeCode(CodeGenerator* codegen) override {
652     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
653     __ Bind(GetEntryLabel());
654         LocationSummary* locations = instruction_->GetLocations();
655     SaveLiveRegisters(codegen, locations);
656     InvokeRuntimeCallingConventionARMVIXL calling_convention;
657     __ Mov(calling_convention.GetRegisterAt(0),
658            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
659 
660     arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
661     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
662   }
663 
GetDescription() const664   const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
665 
666  private:
667   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
668 };
669 
670 class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
671  public:
ArraySetSlowPathARMVIXL(HInstruction * instruction)672   explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
673 
EmitNativeCode(CodeGenerator * codegen)674   void EmitNativeCode(CodeGenerator* codegen) override {
675     LocationSummary* locations = instruction_->GetLocations();
676     __ Bind(GetEntryLabel());
677     SaveLiveRegisters(codegen, locations);
678 
679     InvokeRuntimeCallingConventionARMVIXL calling_convention;
680     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
681     parallel_move.AddMove(
682         locations->InAt(0),
683         LocationFrom(calling_convention.GetRegisterAt(0)),
684         DataType::Type::kReference,
685         nullptr);
686     parallel_move.AddMove(
687         locations->InAt(1),
688         LocationFrom(calling_convention.GetRegisterAt(1)),
689         DataType::Type::kInt32,
690         nullptr);
691     parallel_move.AddMove(
692         locations->InAt(2),
693         LocationFrom(calling_convention.GetRegisterAt(2)),
694         DataType::Type::kReference,
695         nullptr);
696     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
697 
698     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
699     arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
700     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
701     RestoreLiveRegisters(codegen, locations);
702     __ B(GetExitLabel());
703   }
704 
GetDescription() const705   const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
706 
707  private:
708   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
709 };
710 
711 // Slow path generating a read barrier for a heap reference.
712 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
713  public:
ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)714   ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction,
715                                              Location out,
716                                              Location ref,
717                                              Location obj,
718                                              uint32_t offset,
719                                              Location index)
720       : SlowPathCodeARMVIXL(instruction),
721         out_(out),
722         ref_(ref),
723         obj_(obj),
724         offset_(offset),
725         index_(index) {
726     // If `obj` is equal to `out` or `ref`, it means the initial object
727     // has been overwritten by (or after) the heap object reference load
728     // to be instrumented, e.g.:
729     //
730     //   __ LoadFromOffset(kLoadWord, out, out, offset);
731     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
732     //
733     // In that case, we have lost the information about the original
734     // object, and the emitted read barrier cannot work properly.
735     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
736     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
737   }
738 
EmitNativeCode(CodeGenerator * codegen)739   void EmitNativeCode(CodeGenerator* codegen) override {
740     DCHECK(codegen->EmitReadBarrier());
741     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
742     LocationSummary* locations = instruction_->GetLocations();
743     vixl32::Register reg_out = RegisterFrom(out_);
744     DCHECK(locations->CanCall());
745     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
746     DCHECK(instruction_->IsInstanceFieldGet() ||
747            instruction_->IsStaticFieldGet() ||
748            instruction_->IsArrayGet() ||
749            instruction_->IsInstanceOf() ||
750            instruction_->IsCheckCast() ||
751            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
752         << "Unexpected instruction in read barrier for heap reference slow path: "
753         << instruction_->DebugName();
754     // The read barrier instrumentation of object ArrayGet
755     // instructions does not support the HIntermediateAddress
756     // instruction.
757     DCHECK(!(instruction_->IsArrayGet() &&
758              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
759 
760     __ Bind(GetEntryLabel());
761     SaveLiveRegisters(codegen, locations);
762 
763     // We may have to change the index's value, but as `index_` is a
764     // constant member (like other "inputs" of this slow path),
765     // introduce a copy of it, `index`.
766     Location index = index_;
767     if (index_.IsValid()) {
768       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
769       if (instruction_->IsArrayGet()) {
770         // Compute the actual memory offset and store it in `index`.
771         vixl32::Register index_reg = RegisterFrom(index_);
772         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode()));
773         if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) {
774           // We are about to change the value of `index_reg` (see the
775           // calls to art::arm::ArmVIXLMacroAssembler::Lsl and
776           // art::arm::ArmVIXLMacroAssembler::Add below), but it has
777           // not been saved by the previous call to
778           // art::SlowPathCode::SaveLiveRegisters, as it is a
779           // callee-save register --
780           // art::SlowPathCode::SaveLiveRegisters does not consider
781           // callee-save registers, as it has been designed with the
782           // assumption that callee-save registers are supposed to be
783           // handled by the called function.  So, as a callee-save
784           // register, `index_reg` _would_ eventually be saved onto
785           // the stack, but it would be too late: we would have
786           // changed its value earlier.  Therefore, we manually save
787           // it here into another freely available register,
788           // `free_reg`, chosen of course among the caller-save
789           // registers (as a callee-save `free_reg` register would
790           // exhibit the same problem).
791           //
792           // Note we could have requested a temporary register from
793           // the register allocator instead; but we prefer not to, as
794           // this is a slow path, and we know we can find a
795           // caller-save register that is available.
796           vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen);
797           __ Mov(free_reg, index_reg);
798           index_reg = free_reg;
799           index = LocationFrom(index_reg);
800         } else {
801           // The initial register stored in `index_` has already been
802           // saved in the call to art::SlowPathCode::SaveLiveRegisters
803           // (as it is not a callee-save register), so we can freely
804           // use it.
805         }
806         // Shifting the index value contained in `index_reg` by the scale
807         // factor (2) cannot overflow in practice, as the runtime is
808         // unable to allocate object arrays with a size larger than
809         // 2^26 - 1 (that is, 2^28 - 4 bytes).
810         __ Lsl(index_reg, index_reg, TIMES_4);
811         static_assert(
812             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
813             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
814         __ Add(index_reg, index_reg, offset_);
815       } else {
816         // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
817         // (as in the case of ArrayGet), as it is actually an offset to an object field within an
818         // object.
819         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
820         DCHECK(instruction_->GetLocations()->Intrinsified());
821         HInvoke* invoke = instruction_->AsInvoke();
822         DCHECK(IsUnsafeGetReference(invoke) ||
823                IsVarHandleGet(invoke) ||
824                IsVarHandleCASFamily(invoke))
825             << invoke->GetIntrinsic();
826         DCHECK_EQ(offset_, 0U);
827         // Though UnsafeGet's offset location is a register pair, we only pass the low
828         // part (high part is irrelevant for 32-bit addresses) to the slow path.
829         // For VarHandle intrinsics, the index is always just a register.
830         DCHECK(index_.IsRegister());
831         index = index_;
832       }
833     }
834 
835     // We're moving two or three locations to locations that could
836     // overlap, so we need a parallel move resolver.
837     InvokeRuntimeCallingConventionARMVIXL calling_convention;
838     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
839     parallel_move.AddMove(ref_,
840                           LocationFrom(calling_convention.GetRegisterAt(0)),
841                           DataType::Type::kReference,
842                           nullptr);
843     parallel_move.AddMove(obj_,
844                           LocationFrom(calling_convention.GetRegisterAt(1)),
845                           DataType::Type::kReference,
846                           nullptr);
847     if (index.IsValid()) {
848       parallel_move.AddMove(index,
849                             LocationFrom(calling_convention.GetRegisterAt(2)),
850                             DataType::Type::kInt32,
851                             nullptr);
852       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
853     } else {
854       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
855       __ Mov(calling_convention.GetRegisterAt(2), offset_);
856     }
857     arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
858     CheckEntrypointTypes<
859         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
860     arm_codegen->Move32(out_, LocationFrom(r0));
861 
862     RestoreLiveRegisters(codegen, locations);
863     __ B(GetExitLabel());
864   }
865 
GetDescription() const866   const char* GetDescription() const override {
867     return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
868   }
869 
870  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)871   vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
872     uint32_t ref = RegisterFrom(ref_).GetCode();
873     uint32_t obj = RegisterFrom(obj_).GetCode();
874     for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
875       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
876         return vixl32::Register(i);
877       }
878     }
879     // We shall never fail to find a free caller-save register, as
880     // there are more than two core caller-save registers on ARM
881     // (meaning it is possible to find one which is different from
882     // `ref` and `obj`).
883     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
884     LOG(FATAL) << "Could not find a free caller-save register";
885     UNREACHABLE();
886   }
887 
888   const Location out_;
889   const Location ref_;
890   const Location obj_;
891   const uint32_t offset_;
892   // An additional location containing an index to an array.
893   // Only used for HArrayGet and the UnsafeGetObject &
894   // UnsafeGetObjectVolatile intrinsics.
895   const Location index_;
896 
897   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL);
898 };
899 
900 // Slow path generating a read barrier for a GC root.
901 class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
902  public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction * instruction,Location out,Location root)903   ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
904       : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
905   }
906 
EmitNativeCode(CodeGenerator * codegen)907   void EmitNativeCode(CodeGenerator* codegen) override {
908     DCHECK(codegen->EmitReadBarrier());
909     LocationSummary* locations = instruction_->GetLocations();
910     vixl32::Register reg_out = RegisterFrom(out_);
911     DCHECK(locations->CanCall());
912     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
913     DCHECK(instruction_->IsLoadClass() ||
914            instruction_->IsLoadString() ||
915            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
916         << "Unexpected instruction in read barrier for GC root slow path: "
917         << instruction_->DebugName();
918 
919     __ Bind(GetEntryLabel());
920     SaveLiveRegisters(codegen, locations);
921 
922     InvokeRuntimeCallingConventionARMVIXL calling_convention;
923     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
924     arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_);
925     arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
926                                instruction_,
927                                instruction_->GetDexPc(),
928                                this);
929     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
930     arm_codegen->Move32(out_, LocationFrom(r0));
931 
932     RestoreLiveRegisters(codegen, locations);
933     __ B(GetExitLabel());
934   }
935 
GetDescription() const936   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
937 
938  private:
939   const Location out_;
940   const Location root_;
941 
942   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
943 };
944 
945 class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
946  public:
MethodEntryExitHooksSlowPathARMVIXL(HInstruction * instruction)947   explicit MethodEntryExitHooksSlowPathARMVIXL(HInstruction* instruction)
948       : SlowPathCodeARMVIXL(instruction) {}
949 
EmitNativeCode(CodeGenerator * codegen)950   void EmitNativeCode(CodeGenerator* codegen) override {
951     LocationSummary* locations = instruction_->GetLocations();
952     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
953     QuickEntrypointEnum entry_point =
954         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
955     __ Bind(GetEntryLabel());
956     SaveLiveRegisters(codegen, locations);
957     if (instruction_->IsMethodExitHook()) {
958       // Load frame size to pass to the exit hooks
959       __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
960     }
961     arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
962     RestoreLiveRegisters(codegen, locations);
963     __ B(GetExitLabel());
964   }
965 
GetDescription() const966   const char* GetDescription() const override {
967     return "MethodEntryExitHooksSlowPath";
968   }
969 
970  private:
971   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL);
972 };
973 
974 class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
975  public:
CompileOptimizedSlowPathARMVIXL(HSuspendCheck * suspend_check,vixl32::Register profiling_info)976   CompileOptimizedSlowPathARMVIXL(HSuspendCheck* suspend_check,
977                                   vixl32::Register profiling_info)
978       : SlowPathCodeARMVIXL(suspend_check),
979         profiling_info_(profiling_info) {}
980 
EmitNativeCode(CodeGenerator * codegen)981   void EmitNativeCode(CodeGenerator* codegen) override {
982     uint32_t entry_point_offset =
983         GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
984     __ Bind(GetEntryLabel());
985     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
986     UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler());
987     vixl32::Register tmp = temps.Acquire();
988     __ Mov(tmp, ProfilingInfo::GetOptimizeThreshold());
989     __ Strh(tmp,
990             MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
991     __ Ldr(lr, MemOperand(tr, entry_point_offset));
992     // Note: we don't record the call here (and therefore don't generate a stack
993     // map), as the entrypoint should never be suspended.
994     __ Blx(lr);
995     __ B(GetExitLabel());
996   }
997 
GetDescription() const998   const char* GetDescription() const override {
999     return "CompileOptimizedSlowPath";
1000   }
1001 
1002  private:
1003   vixl32::Register profiling_info_;
1004 
1005   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARMVIXL);
1006 };
1007 
ARMCondition(IfCondition cond)1008 inline vixl32::Condition ARMCondition(IfCondition cond) {
1009   switch (cond) {
1010     case kCondEQ: return eq;
1011     case kCondNE: return ne;
1012     case kCondLT: return lt;
1013     case kCondLE: return le;
1014     case kCondGT: return gt;
1015     case kCondGE: return ge;
1016     case kCondB:  return lo;
1017     case kCondBE: return ls;
1018     case kCondA:  return hi;
1019     case kCondAE: return hs;
1020   }
1021   LOG(FATAL) << "Unreachable";
1022   UNREACHABLE();
1023 }
1024 
1025 // Maps signed condition to unsigned condition.
ARMUnsignedCondition(IfCondition cond)1026 inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
1027   switch (cond) {
1028     case kCondEQ: return eq;
1029     case kCondNE: return ne;
1030     // Signed to unsigned.
1031     case kCondLT: return lo;
1032     case kCondLE: return ls;
1033     case kCondGT: return hi;
1034     case kCondGE: return hs;
1035     // Unsigned remain unchanged.
1036     case kCondB:  return lo;
1037     case kCondBE: return ls;
1038     case kCondA:  return hi;
1039     case kCondAE: return hs;
1040   }
1041   LOG(FATAL) << "Unreachable";
1042   UNREACHABLE();
1043 }
1044 
ARMFPCondition(IfCondition cond,bool gt_bias)1045 inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
1046   // The ARM condition codes can express all the necessary branches, see the
1047   // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
1048   // There is no dex instruction or HIR that would need the missing conditions
1049   // "equal or unordered" or "not equal".
1050   switch (cond) {
1051     case kCondEQ: return eq;
1052     case kCondNE: return ne /* unordered */;
1053     case kCondLT: return gt_bias ? cc : lt /* unordered */;
1054     case kCondLE: return gt_bias ? ls : le /* unordered */;
1055     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
1056     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
1057     default:
1058       LOG(FATAL) << "UNREACHABLE";
1059       UNREACHABLE();
1060   }
1061 }
1062 
ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind)1063 inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
1064   switch (op_kind) {
1065     case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
1066     case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
1067     case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
1068     default:
1069       LOG(FATAL) << "Unexpected op kind " << op_kind;
1070       UNREACHABLE();
1071   }
1072 }
1073 
DumpCoreRegister(std::ostream & stream,int reg) const1074 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
1075   stream << vixl32::Register(reg);
1076 }
1077 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1078 void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1079   stream << vixl32::SRegister(reg);
1080 }
1081 
GetInstructionSetFeatures() const1082 const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
1083   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
1084 }
1085 
ComputeSRegisterListMask(const SRegisterList & regs)1086 static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
1087   uint32_t mask = 0;
1088   for (uint32_t i = regs.GetFirstSRegister().GetCode();
1089        i <= regs.GetLastSRegister().GetCode();
1090        ++i) {
1091     mask |= (1 << i);
1092   }
1093   return mask;
1094 }
1095 
1096 // Saves the register in the stack. Returns the size taken on stack.
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1097 size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index,
1098                                               [[maybe_unused]] uint32_t reg_id) {
1099   TODO_VIXL32(FATAL);
1100   UNREACHABLE();
1101 }
1102 
1103 // Restores the register from the stack. Returns the size taken on stack.
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1104 size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index,
1105                                                  [[maybe_unused]] uint32_t reg_id) {
1106   TODO_VIXL32(FATAL);
1107   UNREACHABLE();
1108 }
1109 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1110 size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
1111                                                        [[maybe_unused]] uint32_t reg_id) {
1112   TODO_VIXL32(FATAL);
1113   UNREACHABLE();
1114 }
1115 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1116 size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
1117                                                           [[maybe_unused]] uint32_t reg_id) {
1118   TODO_VIXL32(FATAL);
1119   UNREACHABLE();
1120 }
1121 
GenerateDataProcInstruction(HInstruction::InstructionKind kind,vixl32::Register out,vixl32::Register first,const Operand & second,CodeGeneratorARMVIXL * codegen)1122 static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
1123                                         vixl32::Register out,
1124                                         vixl32::Register first,
1125                                         const Operand& second,
1126                                         CodeGeneratorARMVIXL* codegen) {
1127   if (second.IsImmediate() && second.GetImmediate() == 0) {
1128     const Operand in = kind == HInstruction::kAnd
1129         ? Operand(0)
1130         : Operand(first);
1131 
1132     __ Mov(out, in);
1133   } else {
1134     switch (kind) {
1135       case HInstruction::kAdd:
1136         __ Add(out, first, second);
1137         break;
1138       case HInstruction::kAnd:
1139         __ And(out, first, second);
1140         break;
1141       case HInstruction::kOr:
1142         __ Orr(out, first, second);
1143         break;
1144       case HInstruction::kSub:
1145         __ Sub(out, first, second);
1146         break;
1147       case HInstruction::kXor:
1148         __ Eor(out, first, second);
1149         break;
1150       default:
1151         LOG(FATAL) << "Unexpected instruction kind: " << kind;
1152         UNREACHABLE();
1153     }
1154   }
1155 }
1156 
GenerateDataProc(HInstruction::InstructionKind kind,const Location & out,const Location & first,const Operand & second_lo,const Operand & second_hi,CodeGeneratorARMVIXL * codegen)1157 static void GenerateDataProc(HInstruction::InstructionKind kind,
1158                              const Location& out,
1159                              const Location& first,
1160                              const Operand& second_lo,
1161                              const Operand& second_hi,
1162                              CodeGeneratorARMVIXL* codegen) {
1163   const vixl32::Register first_hi = HighRegisterFrom(first);
1164   const vixl32::Register first_lo = LowRegisterFrom(first);
1165   const vixl32::Register out_hi = HighRegisterFrom(out);
1166   const vixl32::Register out_lo = LowRegisterFrom(out);
1167 
1168   if (kind == HInstruction::kAdd) {
1169     __ Adds(out_lo, first_lo, second_lo);
1170     __ Adc(out_hi, first_hi, second_hi);
1171   } else if (kind == HInstruction::kSub) {
1172     __ Subs(out_lo, first_lo, second_lo);
1173     __ Sbc(out_hi, first_hi, second_hi);
1174   } else {
1175     GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
1176     GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
1177   }
1178 }
1179 
GetShifterOperand(vixl32::Register rm,ShiftType shift,uint32_t shift_imm)1180 static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
1181   return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
1182 }
1183 
GenerateLongDataProc(HDataProcWithShifterOp * instruction,CodeGeneratorARMVIXL * codegen)1184 static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
1185                                  CodeGeneratorARMVIXL* codegen) {
1186   DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
1187   DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
1188 
1189   const LocationSummary* const locations = instruction->GetLocations();
1190   const uint32_t shift_value = instruction->GetShiftAmount();
1191   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
1192   const Location first = locations->InAt(0);
1193   const Location second = locations->InAt(1);
1194   const Location out = locations->Out();
1195   const vixl32::Register first_hi = HighRegisterFrom(first);
1196   const vixl32::Register first_lo = LowRegisterFrom(first);
1197   const vixl32::Register out_hi = HighRegisterFrom(out);
1198   const vixl32::Register out_lo = LowRegisterFrom(out);
1199   const vixl32::Register second_hi = HighRegisterFrom(second);
1200   const vixl32::Register second_lo = LowRegisterFrom(second);
1201   const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
1202 
1203   if (shift_value >= 32) {
1204     if (shift == ShiftType::LSL) {
1205       GenerateDataProcInstruction(kind,
1206                                   out_hi,
1207                                   first_hi,
1208                                   Operand(second_lo, ShiftType::LSL, shift_value - 32),
1209                                   codegen);
1210       GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
1211     } else if (shift == ShiftType::ASR) {
1212       GenerateDataProc(kind,
1213                        out,
1214                        first,
1215                        GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
1216                        Operand(second_hi, ShiftType::ASR, 31),
1217                        codegen);
1218     } else {
1219       DCHECK_EQ(shift, ShiftType::LSR);
1220       GenerateDataProc(kind,
1221                        out,
1222                        first,
1223                        GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
1224                        0,
1225                        codegen);
1226     }
1227   } else {
1228     DCHECK_GT(shift_value, 1U);
1229     DCHECK_LT(shift_value, 32U);
1230 
1231     UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1232 
1233     if (shift == ShiftType::LSL) {
1234       // We are not doing this for HInstruction::kAdd because the output will require
1235       // Location::kOutputOverlap; not applicable to other cases.
1236       if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1237         GenerateDataProcInstruction(kind,
1238                                     out_hi,
1239                                     first_hi,
1240                                     Operand(second_hi, ShiftType::LSL, shift_value),
1241                                     codegen);
1242         GenerateDataProcInstruction(kind,
1243                                     out_hi,
1244                                     out_hi,
1245                                     Operand(second_lo, ShiftType::LSR, 32 - shift_value),
1246                                     codegen);
1247         GenerateDataProcInstruction(kind,
1248                                     out_lo,
1249                                     first_lo,
1250                                     Operand(second_lo, ShiftType::LSL, shift_value),
1251                                     codegen);
1252       } else {
1253         const vixl32::Register temp = temps.Acquire();
1254 
1255         __ Lsl(temp, second_hi, shift_value);
1256         __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
1257         GenerateDataProc(kind,
1258                          out,
1259                          first,
1260                          Operand(second_lo, ShiftType::LSL, shift_value),
1261                          temp,
1262                          codegen);
1263       }
1264     } else {
1265       DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
1266 
1267       // We are not doing this for HInstruction::kAdd because the output will require
1268       // Location::kOutputOverlap; not applicable to other cases.
1269       if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1270         GenerateDataProcInstruction(kind,
1271                                     out_lo,
1272                                     first_lo,
1273                                     Operand(second_lo, ShiftType::LSR, shift_value),
1274                                     codegen);
1275         GenerateDataProcInstruction(kind,
1276                                     out_lo,
1277                                     out_lo,
1278                                     Operand(second_hi, ShiftType::LSL, 32 - shift_value),
1279                                     codegen);
1280         GenerateDataProcInstruction(kind,
1281                                     out_hi,
1282                                     first_hi,
1283                                     Operand(second_hi, shift, shift_value),
1284                                     codegen);
1285       } else {
1286         const vixl32::Register temp = temps.Acquire();
1287 
1288         __ Lsr(temp, second_lo, shift_value);
1289         __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
1290         GenerateDataProc(kind,
1291                          out,
1292                          first,
1293                          temp,
1294                          Operand(second_hi, shift, shift_value),
1295                          codegen);
1296       }
1297     }
1298   }
1299 }
1300 
GenerateVcmp(HInstruction * instruction,CodeGeneratorARMVIXL * codegen)1301 static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
1302   const Location rhs_loc = instruction->GetLocations()->InAt(1);
1303   if (rhs_loc.IsConstant()) {
1304     // 0.0 is the only immediate that can be encoded directly in
1305     // a VCMP instruction.
1306     //
1307     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
1308     // specify that in a floating-point comparison, positive zero
1309     // and negative zero are considered equal, so we can use the
1310     // literal 0.0 for both cases here.
1311     //
1312     // Note however that some methods (Float.equal, Float.compare,
1313     // Float.compareTo, Double.equal, Double.compare,
1314     // Double.compareTo, Math.max, Math.min, StrictMath.max,
1315     // StrictMath.min) consider 0.0 to be (strictly) greater than
1316     // -0.0. So if we ever translate calls to these methods into a
1317     // HCompare instruction, we must handle the -0.0 case with
1318     // care here.
1319     DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
1320 
1321     const DataType::Type type = instruction->InputAt(0)->GetType();
1322 
1323     if (type == DataType::Type::kFloat32) {
1324       __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
1325     } else {
1326       DCHECK_EQ(type, DataType::Type::kFloat64);
1327       __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
1328     }
1329   } else {
1330     __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
1331   }
1332 }
1333 
AdjustConstantForCondition(int64_t value,IfCondition * condition,IfCondition * opposite)1334 static int64_t AdjustConstantForCondition(int64_t value,
1335                                           IfCondition* condition,
1336                                           IfCondition* opposite) {
1337   if (value == 1) {
1338     if (*condition == kCondB) {
1339       value = 0;
1340       *condition = kCondEQ;
1341       *opposite = kCondNE;
1342     } else if (*condition == kCondAE) {
1343       value = 0;
1344       *condition = kCondNE;
1345       *opposite = kCondEQ;
1346     }
1347   } else if (value == -1) {
1348     if (*condition == kCondGT) {
1349       value = 0;
1350       *condition = kCondGE;
1351       *opposite = kCondLT;
1352     } else if (*condition == kCondLE) {
1353       value = 0;
1354       *condition = kCondLT;
1355       *opposite = kCondGE;
1356     }
1357   }
1358 
1359   return value;
1360 }
1361 
GenerateLongTestConstant(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1362 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
1363     HCondition* condition,
1364     bool invert,
1365     CodeGeneratorARMVIXL* codegen) {
1366   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1367 
1368   const LocationSummary* const locations = condition->GetLocations();
1369   IfCondition cond = condition->GetCondition();
1370   IfCondition opposite = condition->GetOppositeCondition();
1371 
1372   if (invert) {
1373     std::swap(cond, opposite);
1374   }
1375 
1376   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1377   const Location left = locations->InAt(0);
1378   const Location right = locations->InAt(1);
1379 
1380   DCHECK(right.IsConstant());
1381 
1382   const vixl32::Register left_high = HighRegisterFrom(left);
1383   const vixl32::Register left_low = LowRegisterFrom(left);
1384   int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
1385   UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1386 
1387   // Comparisons against 0 are common enough to deserve special attention.
1388   if (value == 0) {
1389     switch (cond) {
1390       case kCondNE:
1391       // x > 0 iff x != 0 when the comparison is unsigned.
1392       case kCondA:
1393         ret = std::make_pair(ne, eq);
1394         FALLTHROUGH_INTENDED;
1395       case kCondEQ:
1396       // x <= 0 iff x == 0 when the comparison is unsigned.
1397       case kCondBE:
1398         __ Orrs(temps.Acquire(), left_low, left_high);
1399         return ret;
1400       case kCondLT:
1401       case kCondGE:
1402         __ Cmp(left_high, 0);
1403         return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1404       // Trivially true or false.
1405       case kCondB:
1406         ret = std::make_pair(ne, eq);
1407         FALLTHROUGH_INTENDED;
1408       case kCondAE:
1409         __ Cmp(left_low, left_low);
1410         return ret;
1411       default:
1412         break;
1413     }
1414   }
1415 
1416   switch (cond) {
1417     case kCondEQ:
1418     case kCondNE:
1419     case kCondB:
1420     case kCondBE:
1421     case kCondA:
1422     case kCondAE: {
1423       const uint32_t value_low = Low32Bits(value);
1424       Operand operand_low(value_low);
1425 
1426       __ Cmp(left_high, High32Bits(value));
1427 
1428       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1429       // we must ensure that the operands corresponding to the least significant
1430       // halves of the inputs fit into a 16-bit CMP encoding.
1431       if (!left_low.IsLow() || !IsUint<8>(value_low)) {
1432         operand_low = Operand(temps.Acquire());
1433         __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
1434       }
1435 
1436       // We use the scope because of the IT block that follows.
1437       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1438                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1439                                CodeBufferCheckScope::kExactSize);
1440 
1441       __ it(eq);
1442       __ cmp(eq, left_low, operand_low);
1443       ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1444       break;
1445     }
1446     case kCondLE:
1447     case kCondGT:
1448       // Trivially true or false.
1449       if (value == std::numeric_limits<int64_t>::max()) {
1450         __ Cmp(left_low, left_low);
1451         ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
1452         break;
1453       }
1454 
1455       if (cond == kCondLE) {
1456         DCHECK_EQ(opposite, kCondGT);
1457         cond = kCondLT;
1458         opposite = kCondGE;
1459       } else {
1460         DCHECK_EQ(cond, kCondGT);
1461         DCHECK_EQ(opposite, kCondLE);
1462         cond = kCondGE;
1463         opposite = kCondLT;
1464       }
1465 
1466       value++;
1467       FALLTHROUGH_INTENDED;
1468     case kCondGE:
1469     case kCondLT: {
1470       __ Cmp(left_low, Low32Bits(value));
1471       __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
1472       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1473       break;
1474     }
1475     default:
1476       LOG(FATAL) << "Unreachable";
1477       UNREACHABLE();
1478   }
1479 
1480   return ret;
1481 }
1482 
GenerateLongTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1483 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
1484     HCondition* condition,
1485     bool invert,
1486     CodeGeneratorARMVIXL* codegen) {
1487   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1488 
1489   const LocationSummary* const locations = condition->GetLocations();
1490   IfCondition cond = condition->GetCondition();
1491   IfCondition opposite = condition->GetOppositeCondition();
1492 
1493   if (invert) {
1494     std::swap(cond, opposite);
1495   }
1496 
1497   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1498   Location left = locations->InAt(0);
1499   Location right = locations->InAt(1);
1500 
1501   DCHECK(right.IsRegisterPair());
1502 
1503   switch (cond) {
1504     case kCondEQ:
1505     case kCondNE:
1506     case kCondB:
1507     case kCondBE:
1508     case kCondA:
1509     case kCondAE: {
1510       __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
1511 
1512       // We use the scope because of the IT block that follows.
1513       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1514                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1515                                CodeBufferCheckScope::kExactSize);
1516 
1517       __ it(eq);
1518       __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
1519       ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1520       break;
1521     }
1522     case kCondLE:
1523     case kCondGT:
1524       if (cond == kCondLE) {
1525         DCHECK_EQ(opposite, kCondGT);
1526         cond = kCondGE;
1527         opposite = kCondLT;
1528       } else {
1529         DCHECK_EQ(cond, kCondGT);
1530         DCHECK_EQ(opposite, kCondLE);
1531         cond = kCondLT;
1532         opposite = kCondGE;
1533       }
1534 
1535       std::swap(left, right);
1536       FALLTHROUGH_INTENDED;
1537     case kCondGE:
1538     case kCondLT: {
1539       UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1540 
1541       __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
1542       __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
1543       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1544       break;
1545     }
1546     default:
1547       LOG(FATAL) << "Unreachable";
1548       UNREACHABLE();
1549   }
1550 
1551   return ret;
1552 }
1553 
GenerateTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1554 static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
1555                                                                     bool invert,
1556                                                                     CodeGeneratorARMVIXL* codegen) {
1557   const DataType::Type type = condition->GetLeft()->GetType();
1558   IfCondition cond = condition->GetCondition();
1559   IfCondition opposite = condition->GetOppositeCondition();
1560   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1561 
1562   if (invert) {
1563     std::swap(cond, opposite);
1564   }
1565 
1566   if (type == DataType::Type::kInt64) {
1567     ret = condition->GetLocations()->InAt(1).IsConstant()
1568         ? GenerateLongTestConstant(condition, invert, codegen)
1569         : GenerateLongTest(condition, invert, codegen);
1570   } else if (DataType::IsFloatingPointType(type)) {
1571     GenerateVcmp(condition, codegen);
1572     __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
1573     ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
1574                          ARMFPCondition(opposite, condition->IsGtBias()));
1575   } else {
1576     DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1577     __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
1578     ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1579   }
1580 
1581   return ret;
1582 }
1583 
GenerateConditionGeneric(HCondition * cond,CodeGeneratorARMVIXL * codegen)1584 static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1585   const vixl32::Register out = OutputRegister(cond);
1586   const auto condition = GenerateTest(cond, false, codegen);
1587 
1588   __ Mov(LeaveFlags, out, 0);
1589 
1590   if (out.IsLow()) {
1591     // We use the scope because of the IT block that follows.
1592     ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1593                              2 * vixl32::k16BitT32InstructionSizeInBytes,
1594                              CodeBufferCheckScope::kExactSize);
1595 
1596     __ it(condition.first);
1597     __ mov(condition.first, out, 1);
1598   } else {
1599     vixl32::Label done_label;
1600     vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
1601 
1602     __ B(condition.second, final_label, /* is_far_target= */ false);
1603     __ Mov(out, 1);
1604 
1605     if (done_label.IsReferenced()) {
1606       __ Bind(&done_label);
1607     }
1608   }
1609 }
1610 
GenerateEqualLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1611 static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1612   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1613 
1614   const LocationSummary* const locations = cond->GetLocations();
1615   IfCondition condition = cond->GetCondition();
1616   const vixl32::Register out = OutputRegister(cond);
1617   const Location left = locations->InAt(0);
1618   const Location right = locations->InAt(1);
1619   vixl32::Register left_high = HighRegisterFrom(left);
1620   vixl32::Register left_low = LowRegisterFrom(left);
1621   vixl32::Register temp;
1622   UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1623 
1624   if (right.IsConstant()) {
1625     IfCondition opposite = cond->GetOppositeCondition();
1626     const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
1627                                                      &condition,
1628                                                      &opposite);
1629     Operand right_high = High32Bits(value);
1630     Operand right_low = Low32Bits(value);
1631 
1632     // The output uses Location::kNoOutputOverlap.
1633     if (out.Is(left_high)) {
1634       std::swap(left_low, left_high);
1635       std::swap(right_low, right_high);
1636     }
1637 
1638     __ Sub(out, left_low, right_low);
1639     temp = temps.Acquire();
1640     __ Sub(temp, left_high, right_high);
1641   } else {
1642     DCHECK(right.IsRegisterPair());
1643     temp = temps.Acquire();
1644     __ Sub(temp, left_high, HighRegisterFrom(right));
1645     __ Sub(out, left_low, LowRegisterFrom(right));
1646   }
1647 
1648   // Need to check after calling AdjustConstantForCondition().
1649   DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
1650 
1651   if (condition == kCondNE && out.IsLow()) {
1652     __ Orrs(out, out, temp);
1653 
1654     // We use the scope because of the IT block that follows.
1655     ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1656                              2 * vixl32::k16BitT32InstructionSizeInBytes,
1657                              CodeBufferCheckScope::kExactSize);
1658 
1659     __ it(ne);
1660     __ mov(ne, out, 1);
1661   } else {
1662     __ Orr(out, out, temp);
1663     codegen->GenerateConditionWithZero(condition, out, out, temp);
1664   }
1665 }
1666 
GenerateConditionLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1667 static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1668   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1669 
1670   const LocationSummary* const locations = cond->GetLocations();
1671   IfCondition condition = cond->GetCondition();
1672   const vixl32::Register out = OutputRegister(cond);
1673   const Location left = locations->InAt(0);
1674   const Location right = locations->InAt(1);
1675 
1676   if (right.IsConstant()) {
1677     IfCondition opposite = cond->GetOppositeCondition();
1678 
1679     // Comparisons against 0 are common enough to deserve special attention.
1680     if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
1681       switch (condition) {
1682         case kCondNE:
1683         case kCondA:
1684           if (out.IsLow()) {
1685             // We only care if both input registers are 0 or not.
1686             __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
1687 
1688             // We use the scope because of the IT block that follows.
1689             ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1690                                      2 * vixl32::k16BitT32InstructionSizeInBytes,
1691                                      CodeBufferCheckScope::kExactSize);
1692 
1693             __ it(ne);
1694             __ mov(ne, out, 1);
1695             return;
1696           }
1697 
1698           FALLTHROUGH_INTENDED;
1699         case kCondEQ:
1700         case kCondBE:
1701           // We only care if both input registers are 0 or not.
1702           __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
1703           codegen->GenerateConditionWithZero(condition, out, out);
1704           return;
1705         case kCondLT:
1706         case kCondGE:
1707           // We only care about the sign bit.
1708           FALLTHROUGH_INTENDED;
1709         case kCondAE:
1710         case kCondB:
1711           codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
1712           return;
1713         case kCondLE:
1714         case kCondGT:
1715         default:
1716           break;
1717       }
1718     }
1719   }
1720 
1721   // If `out` is a low register, then the GenerateConditionGeneric()
1722   // function generates a shorter code sequence that is still branchless.
1723   if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
1724     GenerateEqualLong(cond, codegen);
1725     return;
1726   }
1727 
1728   GenerateConditionGeneric(cond, codegen);
1729 }
1730 
GenerateConditionIntegralOrNonPrimitive(HCondition * cond,CodeGeneratorARMVIXL * codegen)1731 static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
1732                                                     CodeGeneratorARMVIXL* codegen) {
1733   const DataType::Type type = cond->GetLeft()->GetType();
1734 
1735   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1736 
1737   if (type == DataType::Type::kInt64) {
1738     GenerateConditionLong(cond, codegen);
1739     return;
1740   }
1741 
1742   IfCondition condition = cond->GetCondition();
1743   vixl32::Register in = InputRegisterAt(cond, 0);
1744   const vixl32::Register out = OutputRegister(cond);
1745   const Location right = cond->GetLocations()->InAt(1);
1746   int64_t value;
1747 
1748   if (right.IsConstant()) {
1749     IfCondition opposite = cond->GetOppositeCondition();
1750 
1751     value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
1752 
1753     // Comparisons against 0 are common enough to deserve special attention.
1754     if (value == 0) {
1755       switch (condition) {
1756         case kCondNE:
1757         case kCondA:
1758           if (out.IsLow() && out.Is(in)) {
1759             __ Cmp(out, 0);
1760 
1761             // We use the scope because of the IT block that follows.
1762             ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1763                                      2 * vixl32::k16BitT32InstructionSizeInBytes,
1764                                      CodeBufferCheckScope::kExactSize);
1765 
1766             __ it(ne);
1767             __ mov(ne, out, 1);
1768             return;
1769           }
1770 
1771           FALLTHROUGH_INTENDED;
1772         case kCondEQ:
1773         case kCondBE:
1774         case kCondLT:
1775         case kCondGE:
1776         case kCondAE:
1777         case kCondB:
1778           codegen->GenerateConditionWithZero(condition, out, in);
1779           return;
1780         case kCondLE:
1781         case kCondGT:
1782         default:
1783           break;
1784       }
1785     }
1786   }
1787 
1788   if (condition == kCondEQ || condition == kCondNE) {
1789     Operand operand(0);
1790 
1791     if (right.IsConstant()) {
1792       operand = Operand::From(value);
1793     } else if (out.Is(RegisterFrom(right))) {
1794       // Avoid 32-bit instructions if possible.
1795       operand = InputOperandAt(cond, 0);
1796       in = RegisterFrom(right);
1797     } else {
1798       operand = InputOperandAt(cond, 1);
1799     }
1800 
1801     if (condition == kCondNE && out.IsLow()) {
1802       __ Subs(out, in, operand);
1803 
1804       // We use the scope because of the IT block that follows.
1805       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1806                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1807                                CodeBufferCheckScope::kExactSize);
1808 
1809       __ it(ne);
1810       __ mov(ne, out, 1);
1811     } else {
1812       __ Sub(out, in, operand);
1813       codegen->GenerateConditionWithZero(condition, out, out);
1814     }
1815 
1816     return;
1817   }
1818 
1819   GenerateConditionGeneric(cond, codegen);
1820 }
1821 
CanEncodeConstantAs8BitImmediate(HConstant * constant)1822 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
1823   const DataType::Type type = constant->GetType();
1824   bool ret = false;
1825 
1826   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1827 
1828   if (type == DataType::Type::kInt64) {
1829     const uint64_t value = Uint64ConstantFrom(constant);
1830 
1831     ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
1832   } else {
1833     ret = IsUint<8>(Int32ConstantFrom(constant));
1834   }
1835 
1836   return ret;
1837 }
1838 
Arm8BitEncodableConstantOrRegister(HInstruction * constant)1839 static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
1840   DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
1841 
1842   if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
1843     return Location::ConstantLocation(constant);
1844   }
1845 
1846   return Location::RequiresRegister();
1847 }
1848 
CanGenerateConditionalMove(const Location & out,const Location & src)1849 static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
1850   // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1851   // we check that we are not dealing with floating-point output (there is no
1852   // 16-bit VMOV encoding).
1853   if (!out.IsRegister() && !out.IsRegisterPair()) {
1854     return false;
1855   }
1856 
1857   // For constants, we also check that the output is in one or two low registers,
1858   // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
1859   // MOV encoding can be used.
1860   if (src.IsConstant()) {
1861     if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
1862       return false;
1863     }
1864 
1865     if (out.IsRegister()) {
1866       if (!RegisterFrom(out).IsLow()) {
1867         return false;
1868       }
1869     } else {
1870       DCHECK(out.IsRegisterPair());
1871 
1872       if (!HighRegisterFrom(out).IsLow()) {
1873         return false;
1874       }
1875     }
1876   }
1877 
1878   return true;
1879 }
1880 
1881 #undef __
1882 
GetFinalLabel(HInstruction * instruction,vixl32::Label * final_label)1883 vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
1884                                                    vixl32::Label* final_label) {
1885   DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
1886   DCHECK_IMPLIES(instruction->IsInvoke(), !instruction->GetLocations()->CanCall());
1887 
1888   const HBasicBlock* const block = instruction->GetBlock();
1889   const HLoopInformation* const info = block->GetLoopInformation();
1890   HInstruction* const next = instruction->GetNext();
1891 
1892   // Avoid a branch to a branch.
1893   if (next->IsGoto() && (info == nullptr ||
1894                          !info->IsBackEdge(*block) ||
1895                          !info->HasSuspendCheck())) {
1896     final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
1897   }
1898 
1899   return final_label;
1900 }
1901 
1902 namespace detail {
1903 
1904 // Mark which intrinsics we don't have handcrafted code for.
1905 template <Intrinsics T>
1906 struct IsUnimplemented {
1907   bool is_unimplemented = false;
1908 };
1909 
1910 #define TRUE_OVERRIDE(Name)                     \
1911   template <>                                   \
1912   struct IsUnimplemented<Intrinsics::k##Name> { \
1913     bool is_unimplemented = true;               \
1914   };
1915 UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
1916 #undef TRUE_OVERRIDE
1917 
1918 static constexpr bool kIsIntrinsicUnimplemented[] = {
1919     false,  // kNone
1920 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1921     IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1922     ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1923 #undef IS_UNIMPLEMENTED
1924 };
1925 
1926 }  // namespace detail
1927 
CodeGeneratorARMVIXL(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1928 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
1929                                            const CompilerOptions& compiler_options,
1930                                            OptimizingCompilerStats* stats)
1931     : CodeGenerator(graph,
1932                     kNumberOfCoreRegisters,
1933                     kNumberOfSRegisters,
1934                     kNumberOfRegisterPairs,
1935                     kCoreCalleeSaves.GetList(),
1936                     ComputeSRegisterListMask(kFpuCalleeSaves),
1937                     compiler_options,
1938                     stats,
1939                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1940       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1941       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1942       location_builder_(graph, this),
1943       instruction_visitor_(graph, this),
1944       move_resolver_(graph->GetAllocator(), this),
1945       assembler_(graph->GetAllocator()),
1946       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1947       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1948       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1949       app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1950       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1951       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1952       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1953       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1954       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1955       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1956       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1957       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1958       uint32_literals_(std::less<uint32_t>(),
1959                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1960       jit_string_patches_(StringReferenceValueComparator(),
1961                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1962       jit_class_patches_(TypeReferenceValueComparator(),
1963                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1964       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1965                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1966   // Always save the LR register to mimic Quick.
1967   AddAllocatedRegister(Location::RegisterLocation(LR));
1968   // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
1969   // S0-S31, which alias to D0-D15.
1970   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
1971   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
1972 }
1973 
EmitTable(CodeGeneratorARMVIXL * codegen)1974 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
1975   uint32_t num_entries = switch_instr_->GetNumEntries();
1976   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1977 
1978   // We are about to use the assembler to place literals directly. Make sure we have enough
1979   // underlying code buffer and we have generated a jump table of the right size, using
1980   // codegen->GetVIXLAssembler()->GetBuffer().Align();
1981   ExactAssemblyScope aas(codegen->GetVIXLAssembler(),
1982                          num_entries * sizeof(int32_t),
1983                          CodeBufferCheckScope::kMaximumSize);
1984   // TODO(VIXL): Check that using lower case bind is fine here.
1985   codegen->GetVIXLAssembler()->bind(&table_start_);
1986   for (uint32_t i = 0; i < num_entries; i++) {
1987     codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
1988   }
1989 }
1990 
FixTable(CodeGeneratorARMVIXL * codegen)1991 void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
1992   uint32_t num_entries = switch_instr_->GetNumEntries();
1993   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1994 
1995   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
1996   for (uint32_t i = 0; i < num_entries; i++) {
1997     vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
1998     DCHECK(target_label->IsBound());
1999     int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
2000     // When doing BX to address we need to have lower bit set to 1 in T32.
2001     if (codegen->GetVIXLAssembler()->IsUsingT32()) {
2002       jump_offset++;
2003     }
2004     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
2005     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
2006 
2007     bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
2008   }
2009 }
2010 
FixJumpTables()2011 void CodeGeneratorARMVIXL::FixJumpTables() {
2012   for (auto&& jump_table : jump_tables_) {
2013     jump_table->FixTable(this);
2014   }
2015 }
2016 
2017 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
2018 
Finalize()2019 void CodeGeneratorARMVIXL::Finalize() {
2020   FixJumpTables();
2021 
2022   // Emit JIT baker read barrier slow paths.
2023   DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
2024   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
2025     uint32_t encoded_data = entry.first;
2026     vixl::aarch32::Label* slow_path_entry = &entry.second.label;
2027     __ Bind(slow_path_entry);
2028     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
2029   }
2030 
2031   GetAssembler()->FinalizeCode();
2032   CodeGenerator::Finalize();
2033 
2034   // Verify Baker read barrier linker patches.
2035   if (kIsDebugBuild) {
2036     ArrayRef<const uint8_t> code(GetCode());
2037     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
2038       DCHECK(info.label.IsBound());
2039       uint32_t literal_offset = info.label.GetLocation();
2040       DCHECK_ALIGNED(literal_offset, 2u);
2041 
2042       auto GetInsn16 = [&code](uint32_t offset) {
2043         DCHECK_ALIGNED(offset, 2u);
2044         return (static_cast<uint32_t>(code[offset + 0]) << 0) +
2045                (static_cast<uint32_t>(code[offset + 1]) << 8);
2046       };
2047       auto GetInsn32 = [=](uint32_t offset) {
2048         return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
2049       };
2050 
2051       uint32_t encoded_data = info.custom_data;
2052       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
2053       // Check that the next instruction matches the expected LDR.
2054       switch (kind) {
2055         case BakerReadBarrierKind::kField: {
2056           BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2057           if (width == BakerReadBarrierWidth::kWide) {
2058             DCHECK_GE(code.size() - literal_offset, 8u);
2059             uint32_t next_insn = GetInsn32(literal_offset + 4u);
2060             // LDR (immediate), encoding T3, with correct base_reg.
2061             CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
2062             const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2063             CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
2064           } else {
2065             DCHECK_GE(code.size() - literal_offset, 6u);
2066             uint32_t next_insn = GetInsn16(literal_offset + 4u);
2067             // LDR (immediate), encoding T1, with correct base_reg.
2068             CheckValidReg(next_insn & 0x7u);  // Check destination register.
2069             const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2070             CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
2071           }
2072           break;
2073         }
2074         case BakerReadBarrierKind::kArray: {
2075           DCHECK_GE(code.size() - literal_offset, 8u);
2076           uint32_t next_insn = GetInsn32(literal_offset + 4u);
2077           // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
2078           CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
2079           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2080           CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
2081           CheckValidReg(next_insn & 0xf);  // Check index register
2082           break;
2083         }
2084         case BakerReadBarrierKind::kGcRoot: {
2085           BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2086           if (width == BakerReadBarrierWidth::kWide) {
2087             DCHECK_GE(literal_offset, 4u);
2088             uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2089             // LDR (immediate), encoding T3, with correct root_reg.
2090             const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2091             CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
2092           } else {
2093             DCHECK_GE(literal_offset, 2u);
2094             uint32_t prev_insn = GetInsn16(literal_offset - 2u);
2095             const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2096             // Usually LDR (immediate), encoding T1, with correct root_reg but we may have
2097             // a `MOV marked, old_value` for intrinsic CAS where `marked` is a low register.
2098             if ((prev_insn & 0xff87u) != (0x4600 | root_reg)) {
2099               CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
2100             }
2101           }
2102           break;
2103         }
2104         case BakerReadBarrierKind::kIntrinsicCas: {
2105           DCHECK_GE(literal_offset, 4u);
2106           uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2107           // MOV (register), encoding T3, with correct root_reg.
2108           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2109           DCHECK_GE(root_reg, 8u);  // Used only for high registers.
2110           CHECK_EQ(prev_insn & 0xfffffff0u, 0xea4f0000u | (root_reg << 8));
2111           break;
2112         }
2113         default:
2114           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
2115           UNREACHABLE();
2116       }
2117     }
2118   }
2119 }
2120 
SetupBlockedRegisters() const2121 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
2122   // Stack register, LR and PC are always reserved.
2123   blocked_core_registers_[SP] = true;
2124   blocked_core_registers_[LR] = true;
2125   blocked_core_registers_[PC] = true;
2126 
2127   // TODO: We don't need to reserve marking-register for userfaultfd GC. But
2128   // that would require some work in the assembler code as the right GC is
2129   // chosen at load-time and not compile time.
2130   if (kReserveMarkingRegister) {
2131     // Reserve marking register.
2132     blocked_core_registers_[MR] = true;
2133   }
2134 
2135   // Reserve thread register.
2136   blocked_core_registers_[TR] = true;
2137 
2138   // Reserve temp register.
2139   blocked_core_registers_[IP] = true;
2140 
2141   if (GetGraph()->IsDebuggable()) {
2142     // Stubs do not save callee-save floating point registers. If the graph
2143     // is debuggable, we need to deal with these registers differently. For
2144     // now, just block them.
2145     for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
2146          i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
2147          ++i) {
2148       blocked_fpu_registers_[i] = true;
2149     }
2150   }
2151 }
2152 
InstructionCodeGeneratorARMVIXL(HGraph * graph,CodeGeneratorARMVIXL * codegen)2153 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
2154                                                                  CodeGeneratorARMVIXL* codegen)
2155       : InstructionCodeGenerator(graph, codegen),
2156         assembler_(codegen->GetAssembler()),
2157         codegen_(codegen) {}
2158 
ComputeSpillMask()2159 void CodeGeneratorARMVIXL::ComputeSpillMask() {
2160   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
2161   DCHECK_NE(core_spill_mask_ & (1u << kLrCode), 0u)
2162       << "At least the return address register must be saved";
2163   // 16-bit PUSH/POP (T1) can save/restore just the LR/PC.
2164   DCHECK(GetVIXLAssembler()->IsUsingT32());
2165   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
2166   // We use vpush and vpop for saving and restoring floating point registers, which take
2167   // a SRegister and the number of registers to save/restore after that SRegister. We
2168   // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
2169   // but in the range.
2170   if (fpu_spill_mask_ != 0) {
2171     uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
2172     uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
2173     for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
2174       fpu_spill_mask_ |= (1 << i);
2175     }
2176   }
2177 }
2178 
VisitMethodExitHook(HMethodExitHook * method_hook)2179 void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) {
2180   LocationSummary* locations = new (GetGraph()->GetAllocator())
2181       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2182   locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
2183   // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2184   // compute the address to store the timestamp counter.
2185   locations->AddRegisterTemps(3);
2186 }
2187 
GenerateMethodEntryExitHook(HInstruction * instruction)2188 void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
2189   LocationSummary* locations = instruction->GetLocations();
2190   vixl32::Register addr = RegisterFrom(locations->GetTemp(0));
2191   vixl32::Register value = RegisterFrom(locations->GetTemp(1));
2192   vixl32::Register tmp = RegisterFrom(locations->GetTemp(2));
2193 
2194   SlowPathCodeARMVIXL* slow_path =
2195       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
2196   codegen_->AddSlowPath(slow_path);
2197 
2198   if (instruction->IsMethodExitHook()) {
2199     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
2200     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
2201     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
2202     // disabled in debuggable runtime. The other bit is used when this method itself requires a
2203     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
2204     GetAssembler()->LoadFromOffset(
2205         kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
2206     __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel());
2207   }
2208 
2209   MemberOffset  offset = instruction->IsMethodExitHook() ?
2210       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
2211       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
2212   uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
2213   __ Mov(addr, address + offset.Int32Value());
2214   __ Ldrb(value, MemOperand(addr, 0));
2215   __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners);
2216   // Check if there are any trace method entry / exit listeners. If no, continue.
2217   __ B(lt, slow_path->GetExitLabel());
2218   // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
2219   // If yes, just take the slow path.
2220   __ B(gt, slow_path->GetEntryLabel());
2221 
2222   // Check if there is place in the buffer to store a new entry, if no, take slow path.
2223   uint32_t trace_buffer_index_offset =
2224       Thread::TraceBufferIndexOffset<kArmPointerSize>().Int32Value();
2225   vixl32::Register index = value;
2226   __ Ldr(index, MemOperand(tr, trace_buffer_index_offset));
2227   __ Subs(index, index, kNumEntriesForWallClock);
2228   __ B(lt, slow_path->GetEntryLabel());
2229 
2230   // Update the index in the `Thread`.
2231   __ Str(index, MemOperand(tr, trace_buffer_index_offset));
2232   // Calculate the entry address in the buffer.
2233   // addr = base_addr + sizeof(void*) * index
2234   __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue()));
2235   __ Add(addr, addr, Operand(index, LSL, TIMES_4));
2236 
2237   // Record method pointer and trace action.
2238   __ Ldr(tmp, MemOperand(sp, 0));
2239   // Use last two bits to encode trace method action. For MethodEntry it is 0
2240   // so no need to set the bits since they are 0 already.
2241   if (instruction->IsMethodExitHook()) {
2242     DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
2243     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
2244     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
2245     __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
2246   }
2247   __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes));
2248 
2249   vixl32::Register tmp1 = index;
2250   // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34.
2251   __ Mrrc(/* lower 32-bit */ tmp,
2252           /* higher 32-bit */ tmp1,
2253           /* coproc= */ 15,
2254           /* opc1= */ 1,
2255           /* crm= */ 14);
2256   static_assert(kHighTimestampOffsetInBytes ==
2257                 kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize));
2258   __ Strd(tmp, tmp1, MemOperand(addr, kTimestampOffsetInBytes));
2259   __ Bind(slow_path->GetExitLabel());
2260 }
2261 
VisitMethodExitHook(HMethodExitHook * instruction)2262 void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instruction) {
2263   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2264   DCHECK(codegen_->RequiresCurrentMethod());
2265   GenerateMethodEntryExitHook(instruction);
2266 }
2267 
VisitMethodEntryHook(HMethodEntryHook * method_hook)2268 void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
2269   LocationSummary* locations = new (GetGraph()->GetAllocator())
2270       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
2271   // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
2272   // compute the address to store the timestamp counter.
2273   locations->AddRegisterTemps(3);
2274 }
2275 
VisitMethodEntryHook(HMethodEntryHook * instruction)2276 void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
2277   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
2278   DCHECK(codegen_->RequiresCurrentMethod());
2279   GenerateMethodEntryExitHook(instruction);
2280 }
2281 
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)2282 void CodeGeneratorARMVIXL::MaybeIncrementHotness(HSuspendCheck* suspend_check,
2283                                                  bool is_frame_entry) {
2284   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
2285     UseScratchRegisterScope temps(GetVIXLAssembler());
2286     vixl32::Register temp = temps.Acquire();
2287     static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
2288     if (!is_frame_entry) {
2289       __ Push(vixl32::Register(kMethodRegister));
2290       GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2291       GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
2292     }
2293     // Load with zero extend to clear the high bits for integer overflow check.
2294     __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2295     vixl::aarch32::Label done;
2296     DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
2297     __ CompareAndBranchIfZero(temp, &done, /* is_far_target= */ false);
2298     __ Add(temp, temp, -1);
2299     __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2300     __ Bind(&done);
2301     if (!is_frame_entry) {
2302       __ Pop(vixl32::Register(kMethodRegister));
2303       GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2304     }
2305   }
2306 
2307   if (GetGraph()->IsCompilingBaseline() &&
2308       GetGraph()->IsUsefulOptimizing() &&
2309       !Runtime::Current()->IsAotCompiler()) {
2310     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2311     DCHECK(info != nullptr);
2312     DCHECK(!HasEmptyFrame());
2313     uint32_t address = reinterpret_cast32<uint32_t>(info);
2314     UseScratchRegisterScope temps(GetVIXLAssembler());
2315     vixl32::Register tmp = temps.Acquire();
2316     SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(
2317         suspend_check, /* profiling_info= */ lr);
2318     AddSlowPath(slow_path);
2319     __ Mov(lr, address);
2320     __ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2321     __ Adds(tmp, tmp, -1);
2322     __ B(cc, slow_path->GetEntryLabel());
2323     __ Strh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2324     __ Bind(slow_path->GetExitLabel());
2325   }
2326 }
2327 
GenerateFrameEntry()2328 void CodeGeneratorARMVIXL::GenerateFrameEntry() {
2329   bool skip_overflow_check =
2330       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
2331   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
2332 
2333   // Check if we need to generate the clinit check. We will jump to the
2334   // resolution stub if the class is not initialized and the executing thread is
2335   // not the thread initializing it.
2336   // We do this before constructing the frame to get the correct stack trace if
2337   // an exception is thrown.
2338   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
2339     UseScratchRegisterScope temps(GetVIXLAssembler());
2340     vixl32::Label resolution;
2341     vixl32::Label memory_barrier;
2342 
2343     // Check if we're visibly initialized.
2344 
2345     vixl32::Register temp1 = temps.Acquire();
2346     // Use r4 as other temporary register.
2347     DCHECK(!blocked_core_registers_[R4]);
2348     DCHECK(!kCoreCalleeSaves.Includes(r4));
2349     vixl32::Register temp2 = r4;
2350     for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2351       DCHECK(!reg.Is(r4));
2352     }
2353 
2354     // We don't emit a read barrier here to save on code size. We rely on the
2355     // resolution trampoline to do a suspend check before re-entering this code.
2356     __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
2357     __ Ldrb(temp2, MemOperand(temp1, kClassStatusByteOffset));
2358     __ Cmp(temp2, kShiftedVisiblyInitializedValue);
2359     __ B(cs, &frame_entry_label_);
2360 
2361     // Check if we're initialized and jump to code that does a memory barrier if
2362     // so.
2363     __ Cmp(temp2, kShiftedInitializedValue);
2364     __ B(cs, &memory_barrier);
2365 
2366     // Check if we're initializing and the thread initializing is the one
2367     // executing the code.
2368     __ Cmp(temp2, kShiftedInitializingValue);
2369     __ B(lo, &resolution);
2370 
2371     __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
2372     __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
2373     __ Cmp(temp1, temp2);
2374     __ B(eq, &frame_entry_label_);
2375     __ Bind(&resolution);
2376 
2377     // Jump to the resolution stub.
2378     ThreadOffset32 entrypoint_offset =
2379         GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
2380     __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
2381     __ Bx(temp1);
2382 
2383     __ Bind(&memory_barrier);
2384     GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2385   }
2386 
2387   __ Bind(&frame_entry_label_);
2388 
2389   if (HasEmptyFrame()) {
2390     // Ensure that the CFI opcode list is not empty.
2391     GetAssembler()->cfi().Nop();
2392     MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2393     return;
2394   }
2395 
2396   // Make sure the frame size isn't unreasonably large.
2397   DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
2398 
2399   if (!skip_overflow_check) {
2400     // Using r4 instead of IP saves 2 bytes.
2401     UseScratchRegisterScope temps(GetVIXLAssembler());
2402     vixl32::Register temp;
2403     // TODO: Remove this check when R4 is made a callee-save register
2404     // in ART compiled code (b/72801708). Currently we need to make
2405     // sure r4 is not blocked, e.g. in special purpose
2406     // TestCodeGeneratorARMVIXL; also asserting that r4 is available
2407     // here.
2408     if (!blocked_core_registers_[R4]) {
2409       for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2410         DCHECK(!reg.Is(r4));
2411       }
2412       DCHECK(!kCoreCalleeSaves.Includes(r4));
2413       temp = r4;
2414     } else {
2415       temp = temps.Acquire();
2416     }
2417     __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
2418     // The load must immediately precede RecordPcInfo.
2419     ExactAssemblyScope aas(GetVIXLAssembler(),
2420                            vixl32::kMaxInstructionSizeInBytes,
2421                            CodeBufferCheckScope::kMaximumSize);
2422     __ ldr(temp, MemOperand(temp));
2423     RecordPcInfo(nullptr, 0);
2424   }
2425 
2426   uint32_t frame_size = GetFrameSize();
2427   uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2428   uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2429   if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2430       core_spills_offset <= 3u * kArmWordSize) {
2431     // Do a single PUSH for core registers including the method and up to two
2432     // filler registers. Then store the single FP spill if any.
2433     // (The worst case is when the method is not required and we actually
2434     // store 3 extra registers but they are stored in the same properly
2435     // aligned 16-byte chunk where we're already writing anyway.)
2436     DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2437     uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize);
2438     DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(core_spill_mask_));
2439     __ Push(RegisterList(core_spill_mask_ | extra_regs));
2440     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
2441     GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2442                                            core_spills_offset,
2443                                            core_spill_mask_,
2444                                            kArmWordSize);
2445     if (fpu_spill_mask_ != 0u) {
2446       DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2447       vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2448       GetAssembler()->StoreSToOffset(sreg, sp, fp_spills_offset);
2449       GetAssembler()->cfi().RelOffset(DWARFReg(sreg), /*offset=*/ fp_spills_offset);
2450     }
2451   } else {
2452     __ Push(RegisterList(core_spill_mask_));
2453     GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
2454     GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2455                                            /*offset=*/ 0,
2456                                            core_spill_mask_,
2457                                            kArmWordSize);
2458     if (fpu_spill_mask_ != 0) {
2459       uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2460 
2461       // Check that list is contiguous.
2462       DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2463 
2464       __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2465       GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
2466       GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0),
2467                                              /*offset=*/ 0,
2468                                              fpu_spill_mask_,
2469                                              kArmWordSize);
2470     }
2471 
2472     // Adjust SP and save the current method if we need it. Note that we do
2473     // not save the method in HCurrentMethod, as the instruction might have
2474     // been removed in the SSA graph.
2475     if (RequiresCurrentMethod() && fp_spills_offset <= 3 * kArmWordSize) {
2476       DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2477       __ Push(RegisterList(MaxInt<uint32_t>(fp_spills_offset / kArmWordSize)));
2478       GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset);
2479     } else {
2480       IncreaseFrame(fp_spills_offset);
2481       if (RequiresCurrentMethod()) {
2482         GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2483       }
2484     }
2485   }
2486 
2487   if (GetGraph()->HasShouldDeoptimizeFlag()) {
2488     UseScratchRegisterScope temps(GetVIXLAssembler());
2489     vixl32::Register temp = temps.Acquire();
2490     // Initialize should_deoptimize flag to 0.
2491     __ Mov(temp, 0);
2492     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
2493   }
2494 
2495   MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
2496   MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
2497 }
2498 
GenerateFrameExit()2499 void CodeGeneratorARMVIXL::GenerateFrameExit() {
2500   if (HasEmptyFrame()) {
2501     __ Bx(lr);
2502     return;
2503   }
2504 
2505   // Pop LR into PC to return.
2506   DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
2507   uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
2508 
2509   uint32_t frame_size = GetFrameSize();
2510   uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2511   uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2512   if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2513       // r4 is blocked by TestCodeGeneratorARMVIXL used by some tests.
2514       core_spills_offset <= (blocked_core_registers_[r4.GetCode()] ? 2u : 3u) * kArmWordSize) {
2515     // Load the FP spill if any and then do a single POP including the method
2516     // and up to two filler registers. If we have no FP spills, this also has
2517     // the advantage that we do not need to emit CFI directives.
2518     if (fpu_spill_mask_ != 0u) {
2519       DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2520       vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2521       GetAssembler()->cfi().RememberState();
2522       GetAssembler()->LoadSFromOffset(sreg, sp, fp_spills_offset);
2523       GetAssembler()->cfi().Restore(DWARFReg(sreg));
2524     }
2525     // Clobber registers r2-r4 as they are caller-save in ART managed ABI and
2526     // never hold the return value.
2527     uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize) << r2.GetCode();
2528     DCHECK_EQ(extra_regs & kCoreCalleeSaves.GetList(), 0u);
2529     DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(pop_mask));
2530     __ Pop(RegisterList(pop_mask | extra_regs));
2531     if (fpu_spill_mask_ != 0u) {
2532       GetAssembler()->cfi().RestoreState();
2533     }
2534   } else {
2535     GetAssembler()->cfi().RememberState();
2536     DecreaseFrame(fp_spills_offset);
2537     if (fpu_spill_mask_ != 0) {
2538       uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2539 
2540       // Check that list is contiguous.
2541       DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2542 
2543       __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2544       GetAssembler()->cfi().AdjustCFAOffset(
2545           -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
2546       GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
2547     }
2548     __ Pop(RegisterList(pop_mask));
2549     GetAssembler()->cfi().RestoreState();
2550     GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
2551   }
2552 }
2553 
Bind(HBasicBlock * block)2554 void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
2555   __ Bind(GetLabelOf(block));
2556 }
2557 
GetNextLocation(DataType::Type type)2558 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2559   switch (type) {
2560     case DataType::Type::kReference:
2561     case DataType::Type::kBool:
2562     case DataType::Type::kUint8:
2563     case DataType::Type::kInt8:
2564     case DataType::Type::kUint16:
2565     case DataType::Type::kInt16:
2566     case DataType::Type::kInt32: {
2567       uint32_t index = gp_index_++;
2568       uint32_t stack_index = stack_index_++;
2569       if (index < calling_convention.GetNumberOfRegisters()) {
2570         return LocationFrom(calling_convention.GetRegisterAt(index));
2571       } else {
2572         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2573       }
2574     }
2575 
2576     case DataType::Type::kInt64: {
2577       uint32_t index = gp_index_;
2578       uint32_t stack_index = stack_index_;
2579       gp_index_ += 2;
2580       stack_index_ += 2;
2581       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2582         if (calling_convention.GetRegisterAt(index).Is(r1)) {
2583           // Skip R1, and use R2_R3 instead.
2584           gp_index_++;
2585           index++;
2586         }
2587       }
2588       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2589         DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
2590                   calling_convention.GetRegisterAt(index + 1).GetCode());
2591 
2592         return LocationFrom(calling_convention.GetRegisterAt(index),
2593                             calling_convention.GetRegisterAt(index + 1));
2594       } else {
2595         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2596       }
2597     }
2598 
2599     case DataType::Type::kFloat32: {
2600       uint32_t stack_index = stack_index_++;
2601       if (float_index_ % 2 == 0) {
2602         float_index_ = std::max(double_index_, float_index_);
2603       }
2604       if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
2605         return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
2606       } else {
2607         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2608       }
2609     }
2610 
2611     case DataType::Type::kFloat64: {
2612       double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
2613       uint32_t stack_index = stack_index_;
2614       stack_index_ += 2;
2615       if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
2616         uint32_t index = double_index_;
2617         double_index_ += 2;
2618         Location result = LocationFrom(
2619           calling_convention.GetFpuRegisterAt(index),
2620           calling_convention.GetFpuRegisterAt(index + 1));
2621         DCHECK(ExpectedPairLayout(result));
2622         return result;
2623       } else {
2624         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2625       }
2626     }
2627 
2628     case DataType::Type::kUint32:
2629     case DataType::Type::kUint64:
2630     case DataType::Type::kVoid:
2631       LOG(FATAL) << "Unexpected parameter type " << type;
2632       UNREACHABLE();
2633   }
2634   return Location::NoLocation();
2635 }
2636 
GetReturnLocation(DataType::Type type) const2637 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
2638   switch (type) {
2639     case DataType::Type::kReference:
2640     case DataType::Type::kBool:
2641     case DataType::Type::kUint8:
2642     case DataType::Type::kInt8:
2643     case DataType::Type::kUint16:
2644     case DataType::Type::kInt16:
2645     case DataType::Type::kUint32:
2646     case DataType::Type::kInt32: {
2647       return LocationFrom(r0);
2648     }
2649 
2650     case DataType::Type::kFloat32: {
2651       return LocationFrom(s0);
2652     }
2653 
2654     case DataType::Type::kUint64:
2655     case DataType::Type::kInt64: {
2656       return LocationFrom(r0, r1);
2657     }
2658 
2659     case DataType::Type::kFloat64: {
2660       return LocationFrom(s0, s1);
2661     }
2662 
2663     case DataType::Type::kVoid:
2664       return Location::NoLocation();
2665   }
2666 
2667   UNREACHABLE();
2668 }
2669 
GetMethodLocation() const2670 Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2671   return LocationFrom(kMethodRegister);
2672 }
2673 
GetNextLocation(DataType::Type type)2674 Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2675   DCHECK_NE(type, DataType::Type::kReference);
2676 
2677   // Native ABI uses the same registers as managed, except that the method register r0
2678   // is a normal argument.
2679   Location location = Location::NoLocation();
2680   if (DataType::Is64BitType(type)) {
2681     gpr_index_ = RoundUp(gpr_index_, 2u);
2682     stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
2683     if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2684       location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
2685                               kParameterCoreRegistersVIXL[gpr_index_]);
2686       gpr_index_ += 2u;
2687     }
2688   } else {
2689     if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2690       location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
2691       ++gpr_index_;
2692     }
2693   }
2694   if (location.IsInvalid()) {
2695     if (DataType::Is64BitType(type)) {
2696       location = Location::DoubleStackSlot(stack_offset_);
2697       stack_offset_ += 2 * kFramePointerSize;
2698     } else {
2699       location = Location::StackSlot(stack_offset_);
2700       stack_offset_ += kFramePointerSize;
2701     }
2702 
2703     if (for_register_allocation_) {
2704       location = Location::Any();
2705     }
2706   }
2707   return location;
2708 }
2709 
GetReturnLocation(DataType::Type type) const2710 Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
2711     const {
2712   // We perform conversion to the managed ABI return register after the call if needed.
2713   InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
2714   return dex_calling_convention.GetReturnLocation(type);
2715 }
2716 
GetMethodLocation() const2717 Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2718   // Pass the method in the hidden argument R4.
2719   return Location::RegisterLocation(R4);
2720 }
2721 
Move32(Location destination,Location source)2722 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
2723   if (source.Equals(destination)) {
2724     return;
2725   }
2726   if (destination.IsRegister()) {
2727     if (source.IsRegister()) {
2728       __ Mov(RegisterFrom(destination), RegisterFrom(source));
2729     } else if (source.IsFpuRegister()) {
2730       __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
2731     } else {
2732       GetAssembler()->LoadFromOffset(kLoadWord,
2733                                      RegisterFrom(destination),
2734                                      sp,
2735                                      source.GetStackIndex());
2736     }
2737   } else if (destination.IsFpuRegister()) {
2738     if (source.IsRegister()) {
2739       __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
2740     } else if (source.IsFpuRegister()) {
2741       __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
2742     } else {
2743       GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
2744     }
2745   } else {
2746     DCHECK(destination.IsStackSlot()) << destination;
2747     if (source.IsRegister()) {
2748       GetAssembler()->StoreToOffset(kStoreWord,
2749                                     RegisterFrom(source),
2750                                     sp,
2751                                     destination.GetStackIndex());
2752     } else if (source.IsFpuRegister()) {
2753       GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
2754     } else {
2755       DCHECK(source.IsStackSlot()) << source;
2756       UseScratchRegisterScope temps(GetVIXLAssembler());
2757       vixl32::Register temp = temps.Acquire();
2758       GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
2759       GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
2760     }
2761   }
2762 }
2763 
MoveConstant(Location location,int32_t value)2764 void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
2765   DCHECK(location.IsRegister());
2766   __ Mov(RegisterFrom(location), value);
2767 }
2768 
MoveLocation(Location dst,Location src,DataType::Type dst_type)2769 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
2770   // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
2771   // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
2772   HParallelMove move(GetGraph()->GetAllocator());
2773   move.AddMove(src, dst, dst_type, nullptr);
2774   GetMoveResolver()->EmitNativeCode(&move);
2775 }
2776 
AddLocationAsTemp(Location location,LocationSummary * locations)2777 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
2778   if (location.IsRegister()) {
2779     locations->AddTemp(location);
2780   } else if (location.IsRegisterPair()) {
2781     locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
2782     locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
2783   } else {
2784     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2785   }
2786 }
2787 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2788 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
2789                                          HInstruction* instruction,
2790                                          uint32_t dex_pc,
2791                                          SlowPathCode* slow_path) {
2792   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2793 
2794   ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint);
2795   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2796   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2797   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2798   if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2799     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2800     // Ensure the pc position is recorded immediately after the `blx` instruction.
2801     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
2802     ExactAssemblyScope aas(GetVIXLAssembler(),
2803                            vixl32::k16BitT32InstructionSizeInBytes,
2804                            CodeBufferCheckScope::kExactSize);
2805     __ blx(lr);
2806     if (EntrypointRequiresStackMap(entrypoint)) {
2807       RecordPcInfo(instruction, dex_pc, slow_path);
2808     }
2809   } else {
2810     // Ensure the pc position is recorded immediately after the `bl` instruction.
2811     ExactAssemblyScope aas(GetVIXLAssembler(),
2812                            vixl32::k32BitT32InstructionSizeInBytes,
2813                            CodeBufferCheckScope::kExactSize);
2814     EmitEntrypointThunkCall(entrypoint_offset);
2815     if (EntrypointRequiresStackMap(entrypoint)) {
2816       RecordPcInfo(instruction, dex_pc, slow_path);
2817     }
2818   }
2819 }
2820 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2821 void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2822                                                                HInstruction* instruction,
2823                                                                SlowPathCode* slow_path) {
2824   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2825   __ Ldr(lr, MemOperand(tr, entry_point_offset));
2826   __ Blx(lr);
2827 }
2828 
HandleGoto(HInstruction * got,HBasicBlock * successor)2829 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2830   if (successor->IsExitBlock()) {
2831     DCHECK(got->GetPrevious()->AlwaysThrows());
2832     return;  // no code needed
2833   }
2834 
2835   HBasicBlock* block = got->GetBlock();
2836   HInstruction* previous = got->GetPrevious();
2837   HLoopInformation* info = block->GetLoopInformation();
2838 
2839   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2840     codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2841     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2842     return;
2843   }
2844   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2845     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2846     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
2847   }
2848   if (!codegen_->GoesToNextBlock(block, successor)) {
2849     __ B(codegen_->GetLabelOf(successor));
2850   }
2851 }
2852 
VisitGoto(HGoto * got)2853 void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
2854   got->SetLocations(nullptr);
2855 }
2856 
VisitGoto(HGoto * got)2857 void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
2858   HandleGoto(got, got->GetSuccessor());
2859 }
2860 
VisitTryBoundary(HTryBoundary * try_boundary)2861 void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2862   try_boundary->SetLocations(nullptr);
2863 }
2864 
VisitTryBoundary(HTryBoundary * try_boundary)2865 void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2866   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2867   if (!successor->IsExitBlock()) {
2868     HandleGoto(try_boundary, successor);
2869   }
2870 }
2871 
VisitExit(HExit * exit)2872 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
2873   exit->SetLocations(nullptr);
2874 }
2875 
VisitExit(HExit * exit)2876 void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {}
2877 
GenerateCompareTestAndBranch(HCondition * condition,vixl32::Label * true_target,vixl32::Label * false_target,bool is_far_target)2878 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
2879                                                                    vixl32::Label* true_target,
2880                                                                    vixl32::Label* false_target,
2881                                                                    bool is_far_target) {
2882   if (true_target == false_target) {
2883     DCHECK(true_target != nullptr);
2884     __ B(true_target);
2885     return;
2886   }
2887 
2888   vixl32::Label* non_fallthrough_target;
2889   bool invert;
2890   bool emit_both_branches;
2891 
2892   if (true_target == nullptr) {
2893     // The true target is fallthrough.
2894     DCHECK(false_target != nullptr);
2895     non_fallthrough_target = false_target;
2896     invert = true;
2897     emit_both_branches = false;
2898   } else {
2899     non_fallthrough_target = true_target;
2900     invert = false;
2901     // Either the false target is fallthrough, or there is no fallthrough
2902     // and both branches must be emitted.
2903     emit_both_branches = (false_target != nullptr);
2904   }
2905 
2906   const auto cond = GenerateTest(condition, invert, codegen_);
2907 
2908   __ B(cond.first, non_fallthrough_target, is_far_target);
2909 
2910   if (emit_both_branches) {
2911     // No target falls through, we need to branch.
2912     __ B(false_target);
2913   }
2914 }
2915 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl32::Label * true_target,vixl32::Label * false_target,bool far_target)2916 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
2917                                                             size_t condition_input_index,
2918                                                             vixl32::Label* true_target,
2919                                                             vixl32::Label* false_target,
2920                                                             bool far_target) {
2921   HInstruction* cond = instruction->InputAt(condition_input_index);
2922 
2923   if (true_target == nullptr && false_target == nullptr) {
2924     // Nothing to do. The code always falls through.
2925     return;
2926   } else if (cond->IsIntConstant()) {
2927     // Constant condition, statically compared against "true" (integer value 1).
2928     if (cond->AsIntConstant()->IsTrue()) {
2929       if (true_target != nullptr) {
2930         __ B(true_target);
2931       }
2932     } else {
2933       DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond);
2934       if (false_target != nullptr) {
2935         __ B(false_target);
2936       }
2937     }
2938     return;
2939   }
2940 
2941   // The following code generates these patterns:
2942   //  (1) true_target == nullptr && false_target != nullptr
2943   //        - opposite condition true => branch to false_target
2944   //  (2) true_target != nullptr && false_target == nullptr
2945   //        - condition true => branch to true_target
2946   //  (3) true_target != nullptr && false_target != nullptr
2947   //        - condition true => branch to true_target
2948   //        - branch to false_target
2949   if (IsBooleanValueOrMaterializedCondition(cond)) {
2950     // Condition has been materialized, compare the output to 0.
2951     if (kIsDebugBuild) {
2952       Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
2953       DCHECK(cond_val.IsRegister());
2954     }
2955     if (true_target == nullptr) {
2956       __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
2957                                 false_target,
2958                                 far_target);
2959     } else {
2960       __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
2961                                    true_target,
2962                                    far_target);
2963     }
2964   } else {
2965     // Condition has not been materialized. Use its inputs as the comparison and
2966     // its condition as the branch condition.
2967     HCondition* condition = cond->AsCondition();
2968 
2969     // If this is a long or FP comparison that has been folded into
2970     // the HCondition, generate the comparison directly.
2971     DataType::Type type = condition->InputAt(0)->GetType();
2972     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2973       GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
2974       return;
2975     }
2976 
2977     vixl32::Label* non_fallthrough_target;
2978     vixl32::Condition arm_cond = vixl32::Condition::None();
2979     const vixl32::Register left = InputRegisterAt(cond, 0);
2980     const Operand right = InputOperandAt(cond, 1);
2981 
2982     if (true_target == nullptr) {
2983       arm_cond = ARMCondition(condition->GetOppositeCondition());
2984       non_fallthrough_target = false_target;
2985     } else {
2986       arm_cond = ARMCondition(condition->GetCondition());
2987       non_fallthrough_target = true_target;
2988     }
2989 
2990     if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
2991       if (arm_cond.Is(eq)) {
2992         __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
2993       } else {
2994         DCHECK(arm_cond.Is(ne));
2995         __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
2996       }
2997     } else {
2998       __ Cmp(left, right);
2999       __ B(arm_cond, non_fallthrough_target, far_target);
3000     }
3001   }
3002 
3003   // If neither branch falls through (case 3), the conditional branch to `true_target`
3004   // was already emitted (case 2) and we need to emit a jump to `false_target`.
3005   if (true_target != nullptr && false_target != nullptr) {
3006     __ B(false_target);
3007   }
3008 }
3009 
VisitIf(HIf * if_instr)3010 void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
3011   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3012   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3013     locations->SetInAt(0, Location::RequiresRegister());
3014     if (GetGraph()->IsCompilingBaseline() &&
3015         codegen_->GetCompilerOptions().ProfileBranches() &&
3016         !Runtime::Current()->IsAotCompiler()) {
3017       locations->AddTemp(Location::RequiresRegister());
3018     }
3019   }
3020 }
3021 
VisitIf(HIf * if_instr)3022 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
3023   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3024   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3025   vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
3026       nullptr : codegen_->GetLabelOf(true_successor);
3027   vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
3028       nullptr : codegen_->GetLabelOf(false_successor);
3029   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3030     if (GetGraph()->IsCompilingBaseline() &&
3031         codegen_->GetCompilerOptions().ProfileBranches() &&
3032         !Runtime::Current()->IsAotCompiler()) {
3033       DCHECK(if_instr->InputAt(0)->IsCondition());
3034       ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3035       DCHECK(info != nullptr);
3036       BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
3037       // Currently, not all If branches are profiled.
3038       if (cache != nullptr) {
3039         uint32_t address =
3040             reinterpret_cast32<uint32_t>(cache) + BranchCache::FalseOffset().Int32Value();
3041         static_assert(
3042             BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
3043             "Unexpected offsets for BranchCache");
3044         vixl32::Label done;
3045         UseScratchRegisterScope temps(GetVIXLAssembler());
3046         vixl32::Register temp = temps.Acquire();
3047         vixl32::Register counter = RegisterFrom(if_instr->GetLocations()->GetTemp(0));
3048         vixl32::Register condition = InputRegisterAt(if_instr, 0);
3049         __ Mov(temp, address);
3050         __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
3051         __ Adds(counter, counter, 1);
3052         __ Uxth(counter, counter);
3053         __ CompareAndBranchIfZero(counter, &done);
3054         __ Strh(counter, MemOperand(temp, condition, LSL, 1));
3055         __ Bind(&done);
3056       }
3057     }
3058   }
3059   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3060 }
3061 
VisitDeoptimize(HDeoptimize * deoptimize)3062 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3063   LocationSummary* locations = new (GetGraph()->GetAllocator())
3064       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3065   InvokeRuntimeCallingConventionARMVIXL calling_convention;
3066   RegisterSet caller_saves = RegisterSet::Empty();
3067   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
3068   locations->SetCustomSlowPathCallerSaves(caller_saves);
3069   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3070     locations->SetInAt(0, Location::RequiresRegister());
3071   }
3072 }
3073 
VisitDeoptimize(HDeoptimize * deoptimize)3074 void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
3075   SlowPathCodeARMVIXL* slow_path =
3076       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
3077   GenerateTestAndBranch(deoptimize,
3078                         /* condition_input_index= */ 0,
3079                         slow_path->GetEntryLabel(),
3080                         /* false_target= */ nullptr);
3081 }
3082 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3083 void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3084   LocationSummary* locations = new (GetGraph()->GetAllocator())
3085       LocationSummary(flag, LocationSummary::kNoCall);
3086   locations->SetOut(Location::RequiresRegister());
3087 }
3088 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3089 void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3090   GetAssembler()->LoadFromOffset(kLoadWord,
3091                                  OutputRegister(flag),
3092                                  sp,
3093                                  codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
3094 }
3095 
VisitSelect(HSelect * select)3096 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
3097   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3098   const bool is_floating_point = DataType::IsFloatingPointType(select->GetType());
3099 
3100   if (is_floating_point) {
3101     locations->SetInAt(0, Location::RequiresFpuRegister());
3102     locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
3103   } else {
3104     locations->SetInAt(0, Location::RequiresRegister());
3105     locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
3106   }
3107 
3108   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3109     locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
3110     // The code generator handles overlap with the values, but not with the condition.
3111     locations->SetOut(Location::SameAsFirstInput());
3112   } else if (is_floating_point) {
3113     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3114   } else {
3115     if (!locations->InAt(1).IsConstant()) {
3116       locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
3117     }
3118 
3119     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3120   }
3121 }
3122 
VisitSelect(HSelect * select)3123 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
3124   HInstruction* const condition = select->GetCondition();
3125   const LocationSummary* const locations = select->GetLocations();
3126   const DataType::Type type = select->GetType();
3127   const Location first = locations->InAt(0);
3128   const Location out = locations->Out();
3129   const Location second = locations->InAt(1);
3130 
3131   // In the unlucky case the output of this instruction overlaps
3132   // with an input of an "emitted-at-use-site" condition, and
3133   // the output of this instruction is not one of its inputs, we'll
3134   // need to fallback to branches instead of conditional ARM instructions.
3135   bool output_overlaps_with_condition_inputs =
3136       !IsBooleanValueOrMaterializedCondition(condition) &&
3137       !out.Equals(first) &&
3138       !out.Equals(second) &&
3139       (condition->GetLocations()->InAt(0).Equals(out) ||
3140        condition->GetLocations()->InAt(1).Equals(out));
3141   DCHECK_IMPLIES(output_overlaps_with_condition_inputs, condition->IsCondition());
3142   Location src;
3143 
3144   if (condition->IsIntConstant()) {
3145     if (condition->AsIntConstant()->IsFalse()) {
3146       src = first;
3147     } else {
3148       src = second;
3149     }
3150 
3151     codegen_->MoveLocation(out, src, type);
3152     return;
3153   }
3154 
3155   if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
3156     bool invert = false;
3157 
3158     if (out.Equals(second)) {
3159       src = first;
3160       invert = true;
3161     } else if (out.Equals(first)) {
3162       src = second;
3163     } else if (second.IsConstant()) {
3164       DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
3165       src = second;
3166     } else if (first.IsConstant()) {
3167       DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
3168       src = first;
3169       invert = true;
3170     } else {
3171       src = second;
3172     }
3173 
3174     if (CanGenerateConditionalMove(out, src)) {
3175       if (!out.Equals(first) && !out.Equals(second)) {
3176         codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
3177       }
3178 
3179       std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
3180 
3181       if (IsBooleanValueOrMaterializedCondition(condition)) {
3182         __ Cmp(InputRegisterAt(select, 2), 0);
3183         cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
3184       } else {
3185         cond = GenerateTest(condition->AsCondition(), invert, codegen_);
3186       }
3187 
3188       const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
3189       // We use the scope because of the IT block that follows.
3190       ExactAssemblyScope guard(GetVIXLAssembler(),
3191                                instr_count * vixl32::k16BitT32InstructionSizeInBytes,
3192                                CodeBufferCheckScope::kExactSize);
3193 
3194       if (out.IsRegister()) {
3195         __ it(cond.first);
3196         __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
3197       } else {
3198         DCHECK(out.IsRegisterPair());
3199 
3200         Operand operand_high(0);
3201         Operand operand_low(0);
3202 
3203         if (src.IsConstant()) {
3204           const int64_t value = Int64ConstantFrom(src);
3205 
3206           operand_high = High32Bits(value);
3207           operand_low = Low32Bits(value);
3208         } else {
3209           DCHECK(src.IsRegisterPair());
3210           operand_high = HighRegisterFrom(src);
3211           operand_low = LowRegisterFrom(src);
3212         }
3213 
3214         __ it(cond.first);
3215         __ mov(cond.first, LowRegisterFrom(out), operand_low);
3216         __ it(cond.first);
3217         __ mov(cond.first, HighRegisterFrom(out), operand_high);
3218       }
3219 
3220       return;
3221     }
3222   }
3223 
3224   vixl32::Label* false_target = nullptr;
3225   vixl32::Label* true_target = nullptr;
3226   vixl32::Label select_end;
3227   vixl32::Label other_case;
3228   vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
3229 
3230   if (out.Equals(second)) {
3231     true_target = target;
3232     src = first;
3233   } else {
3234     false_target = target;
3235     src = second;
3236 
3237     if (!out.Equals(first)) {
3238       if (output_overlaps_with_condition_inputs) {
3239         false_target = &other_case;
3240       } else {
3241         codegen_->MoveLocation(out, first, type);
3242       }
3243     }
3244   }
3245 
3246   GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false);
3247   codegen_->MoveLocation(out, src, type);
3248   if (output_overlaps_with_condition_inputs) {
3249     __ B(target);
3250     __ Bind(&other_case);
3251     codegen_->MoveLocation(out, first, type);
3252   }
3253 
3254   if (select_end.IsReferenced()) {
3255     __ Bind(&select_end);
3256   }
3257 }
3258 
VisitNop(HNop * nop)3259 void LocationsBuilderARMVIXL::VisitNop(HNop* nop) {
3260   new (GetGraph()->GetAllocator()) LocationSummary(nop);
3261 }
3262 
VisitNop(HNop *)3263 void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) {
3264   // The environment recording already happened in CodeGenerator::Compile.
3265 }
3266 
IncreaseFrame(size_t adjustment)3267 void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
3268   __ Claim(adjustment);
3269   GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3270 }
3271 
DecreaseFrame(size_t adjustment)3272 void CodeGeneratorARMVIXL::DecreaseFrame(size_t adjustment) {
3273   __ Drop(adjustment);
3274   GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3275 }
3276 
GenerateNop()3277 void CodeGeneratorARMVIXL::GenerateNop() {
3278   __ Nop();
3279 }
3280 
3281 // `temp` is an extra temporary register that is used for some conditions;
3282 // callers may not specify it, in which case the method will use a scratch
3283 // register instead.
GenerateConditionWithZero(IfCondition condition,vixl32::Register out,vixl32::Register in,vixl32::Register temp)3284 void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
3285                                                      vixl32::Register out,
3286                                                      vixl32::Register in,
3287                                                      vixl32::Register temp) {
3288   switch (condition) {
3289     case kCondEQ:
3290     // x <= 0 iff x == 0 when the comparison is unsigned.
3291     case kCondBE:
3292       if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
3293         temp = out;
3294       }
3295 
3296       // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
3297       // different as well.
3298       if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
3299         // temp = - in; only 0 sets the carry flag.
3300         __ Rsbs(temp, in, 0);
3301 
3302         if (out.Is(in)) {
3303           std::swap(in, temp);
3304         }
3305 
3306         // out = - in + in + carry = carry
3307         __ Adc(out, temp, in);
3308       } else {
3309         // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
3310         __ Clz(out, in);
3311         // Any number less than 32 logically shifted right by 5 bits results in 0;
3312         // the same operation on 32 yields 1.
3313         __ Lsr(out, out, 5);
3314       }
3315 
3316       break;
3317     case kCondNE:
3318     // x > 0 iff x != 0 when the comparison is unsigned.
3319     case kCondA: {
3320       UseScratchRegisterScope temps(GetVIXLAssembler());
3321 
3322       if (out.Is(in)) {
3323         if (!temp.IsValid() || in.Is(temp)) {
3324           temp = temps.Acquire();
3325         }
3326       } else if (!temp.IsValid() || !temp.IsLow()) {
3327         temp = out;
3328       }
3329 
3330       // temp = in - 1; only 0 does not set the carry flag.
3331       __ Subs(temp, in, 1);
3332       // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
3333       __ Sbc(out, in, temp);
3334       break;
3335     }
3336     case kCondGE:
3337       __ Mvn(out, in);
3338       in = out;
3339       FALLTHROUGH_INTENDED;
3340     case kCondLT:
3341       // We only care about the sign bit.
3342       __ Lsr(out, in, 31);
3343       break;
3344     case kCondAE:
3345       // Trivially true.
3346       __ Mov(out, 1);
3347       break;
3348     case kCondB:
3349       // Trivially false.
3350       __ Mov(out, 0);
3351       break;
3352     default:
3353       LOG(FATAL) << "Unexpected condition " << condition;
3354       UNREACHABLE();
3355   }
3356 }
3357 
HandleCondition(HCondition * cond)3358 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
3359   LocationSummary* locations =
3360       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
3361   const DataType::Type type = cond->InputAt(0)->GetType();
3362   if (DataType::IsFloatingPointType(type)) {
3363     locations->SetInAt(0, Location::RequiresFpuRegister());
3364     locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
3365   } else {
3366     locations->SetInAt(0, Location::RequiresRegister());
3367     locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
3368   }
3369   if (!cond->IsEmittedAtUseSite()) {
3370     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3371   }
3372 }
3373 
HandleCondition(HCondition * cond)3374 void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
3375   if (cond->IsEmittedAtUseSite()) {
3376     return;
3377   }
3378 
3379   const DataType::Type type = cond->GetLeft()->GetType();
3380 
3381   if (DataType::IsFloatingPointType(type)) {
3382     GenerateConditionGeneric(cond, codegen_);
3383     return;
3384   }
3385 
3386   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
3387 
3388   const IfCondition condition = cond->GetCondition();
3389 
3390   // A condition with only one boolean input, or two boolean inputs without being equality or
3391   // inequality results from transformations done by the instruction simplifier, and is handled
3392   // as a regular condition with integral inputs.
3393   if (type == DataType::Type::kBool &&
3394       cond->GetRight()->GetType() == DataType::Type::kBool &&
3395       (condition == kCondEQ || condition == kCondNE)) {
3396     vixl32::Register left = InputRegisterAt(cond, 0);
3397     const vixl32::Register out = OutputRegister(cond);
3398     const Location right_loc = cond->GetLocations()->InAt(1);
3399 
3400     // The constant case is handled by the instruction simplifier.
3401     DCHECK(!right_loc.IsConstant());
3402 
3403     vixl32::Register right = RegisterFrom(right_loc);
3404 
3405     // Avoid 32-bit instructions if possible.
3406     if (out.Is(right)) {
3407       std::swap(left, right);
3408     }
3409 
3410     __ Eor(out, left, right);
3411 
3412     if (condition == kCondEQ) {
3413       __ Eor(out, out, 1);
3414     }
3415 
3416     return;
3417   }
3418 
3419   GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
3420 }
3421 
VisitEqual(HEqual * comp)3422 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
3423   HandleCondition(comp);
3424 }
3425 
VisitEqual(HEqual * comp)3426 void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) {
3427   HandleCondition(comp);
3428 }
3429 
VisitNotEqual(HNotEqual * comp)3430 void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) {
3431   HandleCondition(comp);
3432 }
3433 
VisitNotEqual(HNotEqual * comp)3434 void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) {
3435   HandleCondition(comp);
3436 }
3437 
VisitLessThan(HLessThan * comp)3438 void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) {
3439   HandleCondition(comp);
3440 }
3441 
VisitLessThan(HLessThan * comp)3442 void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) {
3443   HandleCondition(comp);
3444 }
3445 
VisitLessThanOrEqual(HLessThanOrEqual * comp)3446 void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3447   HandleCondition(comp);
3448 }
3449 
VisitLessThanOrEqual(HLessThanOrEqual * comp)3450 void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3451   HandleCondition(comp);
3452 }
3453 
VisitGreaterThan(HGreaterThan * comp)3454 void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3455   HandleCondition(comp);
3456 }
3457 
VisitGreaterThan(HGreaterThan * comp)3458 void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3459   HandleCondition(comp);
3460 }
3461 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3462 void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3463   HandleCondition(comp);
3464 }
3465 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3466 void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3467   HandleCondition(comp);
3468 }
3469 
VisitBelow(HBelow * comp)3470 void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) {
3471   HandleCondition(comp);
3472 }
3473 
VisitBelow(HBelow * comp)3474 void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) {
3475   HandleCondition(comp);
3476 }
3477 
VisitBelowOrEqual(HBelowOrEqual * comp)3478 void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3479   HandleCondition(comp);
3480 }
3481 
VisitBelowOrEqual(HBelowOrEqual * comp)3482 void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3483   HandleCondition(comp);
3484 }
3485 
VisitAbove(HAbove * comp)3486 void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) {
3487   HandleCondition(comp);
3488 }
3489 
VisitAbove(HAbove * comp)3490 void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) {
3491   HandleCondition(comp);
3492 }
3493 
VisitAboveOrEqual(HAboveOrEqual * comp)3494 void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3495   HandleCondition(comp);
3496 }
3497 
VisitAboveOrEqual(HAboveOrEqual * comp)3498 void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3499   HandleCondition(comp);
3500 }
3501 
VisitIntConstant(HIntConstant * constant)3502 void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
3503   LocationSummary* locations =
3504       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3505   locations->SetOut(Location::ConstantLocation(constant));
3506 }
3507 
VisitIntConstant(HIntConstant * constant)3508 void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
3509   // Will be generated at use site.
3510 }
3511 
VisitNullConstant(HNullConstant * constant)3512 void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
3513   LocationSummary* locations =
3514       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3515   locations->SetOut(Location::ConstantLocation(constant));
3516 }
3517 
VisitNullConstant(HNullConstant * constant)3518 void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
3519   // Will be generated at use site.
3520 }
3521 
VisitLongConstant(HLongConstant * constant)3522 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
3523   LocationSummary* locations =
3524       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3525   locations->SetOut(Location::ConstantLocation(constant));
3526 }
3527 
VisitLongConstant(HLongConstant * constant)3528 void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
3529   // Will be generated at use site.
3530 }
3531 
VisitFloatConstant(HFloatConstant * constant)3532 void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
3533   LocationSummary* locations =
3534       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3535   locations->SetOut(Location::ConstantLocation(constant));
3536 }
3537 
VisitFloatConstant(HFloatConstant * constant)3538 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
3539     [[maybe_unused]] HFloatConstant* constant) {
3540   // Will be generated at use site.
3541 }
3542 
VisitDoubleConstant(HDoubleConstant * constant)3543 void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
3544   LocationSummary* locations =
3545       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3546   locations->SetOut(Location::ConstantLocation(constant));
3547 }
3548 
VisitDoubleConstant(HDoubleConstant * constant)3549 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
3550     [[maybe_unused]] HDoubleConstant* constant) {
3551   // Will be generated at use site.
3552 }
3553 
VisitConstructorFence(HConstructorFence * constructor_fence)3554 void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
3555   constructor_fence->SetLocations(nullptr);
3556 }
3557 
VisitConstructorFence(HConstructorFence * constructor_fence)3558 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
3559     [[maybe_unused]] HConstructorFence* constructor_fence) {
3560   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3561 }
3562 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3563 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3564   memory_barrier->SetLocations(nullptr);
3565 }
3566 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3567 void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3568   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
3569 }
3570 
VisitReturnVoid(HReturnVoid * ret)3571 void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
3572   ret->SetLocations(nullptr);
3573 }
3574 
VisitReturnVoid(HReturnVoid * ret)3575 void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
3576   codegen_->GenerateFrameExit();
3577 }
3578 
VisitReturn(HReturn * ret)3579 void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) {
3580   LocationSummary* locations =
3581       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
3582   locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
3583 }
3584 
VisitReturn(HReturn * ret)3585 void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret) {
3586   if (GetGraph()->IsCompilingOsr()) {
3587     // To simplify callers of an OSR method, we put the return value in both
3588     // floating point and core registers.
3589     switch (ret->InputAt(0)->GetType()) {
3590       case DataType::Type::kFloat32:
3591         __ Vmov(r0, s0);
3592         break;
3593       case DataType::Type::kFloat64:
3594         __ Vmov(r0, r1, d0);
3595         break;
3596       default:
3597         break;
3598     }
3599   }
3600   codegen_->GenerateFrameExit();
3601 }
3602 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3603 void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3604   // The trampoline uses the same calling convention as dex calling conventions,
3605   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3606   // the method_idx.
3607   HandleInvoke(invoke);
3608 }
3609 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3610 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3611   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3612   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3);
3613 }
3614 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3615 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3616   // Explicit clinit checks triggered by static invokes must have been pruned by
3617   // art::PrepareForRegisterAllocation.
3618   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3619 
3620   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3621   if (intrinsic.TryDispatch(invoke)) {
3622     return;
3623   }
3624 
3625   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3626     CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
3627         /*for_register_allocation=*/ true);
3628     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3629   } else {
3630     HandleInvoke(invoke);
3631   }
3632 }
3633 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3634 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3635   if (invoke->GetLocations()->Intrinsified()) {
3636     IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
3637     intrinsic.Dispatch(invoke);
3638     return true;
3639   }
3640   return false;
3641 }
3642 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3643 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3644   // Explicit clinit checks triggered by static invokes must have been pruned by
3645   // art::PrepareForRegisterAllocation.
3646   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3647 
3648   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3649     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4);
3650     return;
3651   }
3652 
3653   LocationSummary* locations = invoke->GetLocations();
3654   codegen_->GenerateStaticOrDirectCall(
3655       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3656 
3657   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5);
3658 }
3659 
HandleInvoke(HInvoke * invoke)3660 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
3661   InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
3662   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3663 }
3664 
VisitInvokeVirtual(HInvokeVirtual * invoke)3665 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3666   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3667   if (intrinsic.TryDispatch(invoke)) {
3668     return;
3669   }
3670 
3671   HandleInvoke(invoke);
3672 }
3673 
VisitInvokeVirtual(HInvokeVirtual * invoke)3674 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3675   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3676     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6);
3677     return;
3678   }
3679 
3680   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3681   DCHECK(!codegen_->IsLeafMethod());
3682 
3683   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7);
3684 }
3685 
VisitInvokeInterface(HInvokeInterface * invoke)3686 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3687   HandleInvoke(invoke);
3688   // Add the hidden argument.
3689   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3690     // We cannot request r12 as it's blocked by the register allocator.
3691     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
3692   }
3693 }
3694 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,vixl32::Register klass)3695 void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3696                                                          vixl32::Register klass) {
3697   DCHECK_EQ(r0.GetCode(), klass.GetCode());
3698   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3699     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3700     InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3701         info, GetCompilerOptions(), instruction->AsInvoke());
3702     if (cache != nullptr) {
3703       uint32_t address = reinterpret_cast32<uint32_t>(cache);
3704       vixl32::Label done;
3705       UseScratchRegisterScope temps(GetVIXLAssembler());
3706       temps.Exclude(ip);
3707       __ Mov(r4, address);
3708       __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
3709       // Fast path for a monomorphic cache.
3710       __ Cmp(klass, ip);
3711       __ B(eq, &done, /* is_far_target= */ false);
3712       InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
3713       __ Bind(&done);
3714     } else {
3715       // This is unexpected, but we don't guarantee stable compilation across
3716       // JIT runs so just warn about it.
3717       ScopedObjectAccess soa(Thread::Current());
3718       LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3719     }
3720   }
3721 }
3722 
VisitInvokeInterface(HInvokeInterface * invoke)3723 void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3724   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3725   LocationSummary* locations = invoke->GetLocations();
3726   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3727   Location receiver = locations->InAt(0);
3728   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3729 
3730   DCHECK(!receiver.IsStackSlot());
3731 
3732   // Ensure the pc position is recorded immediately after the `ldr` instruction.
3733   {
3734     ExactAssemblyScope aas(GetVIXLAssembler(),
3735                            vixl32::kMaxInstructionSizeInBytes,
3736                            CodeBufferCheckScope::kMaximumSize);
3737     // /* HeapReference<Class> */ temp = receiver->klass_
3738     __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset));
3739     codegen_->MaybeRecordImplicitNullCheck(invoke);
3740   }
3741   // Instead of simply (possibly) unpoisoning `temp` here, we should
3742   // emit a read barrier for the previous class reference load.
3743   // However this is not required in practice, as this is an
3744   // intermediate/temporary reference and because the current
3745   // concurrent copying collector keeps the from-space memory
3746   // intact/accessible until the end of the marking phase (the
3747   // concurrent copying collector may not in the future).
3748   GetAssembler()->MaybeUnpoisonHeapReference(temp);
3749 
3750   // If we're compiling baseline, update the inline cache.
3751   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3752 
3753   GetAssembler()->LoadFromOffset(kLoadWord,
3754                                  temp,
3755                                  temp,
3756                                  mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
3757 
3758   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3759       invoke->GetImtIndex(), kArmPointerSize));
3760   // temp = temp->GetImtEntryAt(method_offset);
3761   GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
3762   uint32_t entry_point =
3763       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
3764   // LR = temp->GetEntryPoint();
3765   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
3766 
3767   {
3768     // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
3769     // instruction from clobbering it as they might use r12 as a scratch register.
3770     Location hidden_reg = Location::RegisterLocation(r12.GetCode());
3771     // The VIXL macro assembler may clobber any of the scratch registers that are available to it,
3772     // so it checks if the application is using them (by passing them to the macro assembler
3773     // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of
3774     // what is available, and is the opposite of the standard usage: Instead of requesting a
3775     // temporary location, it imposes an external constraint (i.e. a specific register is reserved
3776     // for the hidden argument). Note that this works even if VIXL needs a scratch register itself
3777     // (to materialize the constant), since the destination register becomes available for such use
3778     // internally for the duration of the macro instruction.
3779     UseScratchRegisterScope temps(GetVIXLAssembler());
3780     temps.Exclude(RegisterFrom(hidden_reg));
3781     if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3782       Location current_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
3783       if (current_method.IsStackSlot()) {
3784         GetAssembler()->LoadFromOffset(
3785             kLoadWord, RegisterFrom(hidden_reg), sp, current_method.GetStackIndex());
3786       } else {
3787         __ Mov(RegisterFrom(hidden_reg), RegisterFrom(current_method));
3788       }
3789     } else if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3790       // We pass the method from the IMT in case of a conflict. This will ensure
3791       // we go into the runtime to resolve the actual method.
3792       CHECK_NE(temp.GetCode(), lr.GetCode());
3793       __ Mov(RegisterFrom(hidden_reg), temp);
3794     } else {
3795       codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3796     }
3797   }
3798   {
3799     // Ensure the pc position is recorded immediately after the `blx` instruction.
3800     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
3801     ExactAssemblyScope aas(GetVIXLAssembler(),
3802                            vixl32::k16BitT32InstructionSizeInBytes,
3803                            CodeBufferCheckScope::kExactSize);
3804     // LR();
3805     __ blx(lr);
3806     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3807     DCHECK(!codegen_->IsLeafMethod());
3808   }
3809 
3810   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8);
3811 }
3812 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3813 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3814   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3815   if (intrinsic.TryDispatch(invoke)) {
3816     return;
3817   }
3818   HandleInvoke(invoke);
3819 }
3820 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3821 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3822   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3823     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9);
3824     return;
3825   }
3826   codegen_->GenerateInvokePolymorphicCall(invoke);
3827   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10);
3828 }
3829 
VisitInvokeCustom(HInvokeCustom * invoke)3830 void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3831   HandleInvoke(invoke);
3832 }
3833 
VisitInvokeCustom(HInvokeCustom * invoke)3834 void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3835   codegen_->GenerateInvokeCustomCall(invoke);
3836   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11);
3837 }
3838 
VisitNeg(HNeg * neg)3839 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
3840   LocationSummary* locations =
3841       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3842   switch (neg->GetResultType()) {
3843     case DataType::Type::kInt32: {
3844       locations->SetInAt(0, Location::RequiresRegister());
3845       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3846       break;
3847     }
3848     case DataType::Type::kInt64: {
3849       locations->SetInAt(0, Location::RequiresRegister());
3850       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3851       break;
3852     }
3853 
3854     case DataType::Type::kFloat32:
3855     case DataType::Type::kFloat64:
3856       locations->SetInAt(0, Location::RequiresFpuRegister());
3857       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3858       break;
3859 
3860     default:
3861       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3862   }
3863 }
3864 
VisitNeg(HNeg * neg)3865 void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
3866   LocationSummary* locations = neg->GetLocations();
3867   Location out = locations->Out();
3868   Location in = locations->InAt(0);
3869   switch (neg->GetResultType()) {
3870     case DataType::Type::kInt32:
3871       __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
3872       break;
3873 
3874     case DataType::Type::kInt64:
3875       // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
3876       __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
3877       // We cannot emit an RSC (Reverse Subtract with Carry)
3878       // instruction here, as it does not exist in the Thumb-2
3879       // instruction set.  We use the following approach
3880       // using SBC and SUB instead.
3881       //
3882       // out.hi = -C
3883       __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
3884       // out.hi = out.hi - in.hi
3885       __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
3886       break;
3887 
3888     case DataType::Type::kFloat32:
3889     case DataType::Type::kFloat64:
3890       __ Vneg(OutputVRegister(neg), InputVRegister(neg));
3891       break;
3892 
3893     default:
3894       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3895   }
3896 }
3897 
VisitTypeConversion(HTypeConversion * conversion)3898 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3899   DataType::Type result_type = conversion->GetResultType();
3900   DataType::Type input_type = conversion->GetInputType();
3901   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3902       << input_type << " -> " << result_type;
3903 
3904   // The float-to-long, double-to-long and long-to-float type conversions
3905   // rely on a call to the runtime.
3906   LocationSummary::CallKind call_kind =
3907       (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3908         && result_type == DataType::Type::kInt64)
3909        || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32))
3910       ? LocationSummary::kCallOnMainOnly
3911       : LocationSummary::kNoCall;
3912   LocationSummary* locations =
3913       new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3914 
3915   switch (result_type) {
3916     case DataType::Type::kUint8:
3917     case DataType::Type::kInt8:
3918     case DataType::Type::kUint16:
3919     case DataType::Type::kInt16:
3920       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3921       locations->SetInAt(0, Location::RequiresRegister());
3922       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3923       break;
3924 
3925     case DataType::Type::kInt32:
3926       switch (input_type) {
3927         case DataType::Type::kInt64:
3928           locations->SetInAt(0, Location::Any());
3929           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3930           break;
3931 
3932         case DataType::Type::kFloat32:
3933           locations->SetInAt(0, Location::RequiresFpuRegister());
3934           locations->SetOut(Location::RequiresRegister());
3935           locations->AddTemp(Location::RequiresFpuRegister());
3936           break;
3937 
3938         case DataType::Type::kFloat64:
3939           locations->SetInAt(0, Location::RequiresFpuRegister());
3940           locations->SetOut(Location::RequiresRegister());
3941           locations->AddTemp(Location::RequiresFpuRegister());
3942           break;
3943 
3944         default:
3945           LOG(FATAL) << "Unexpected type conversion from " << input_type
3946                      << " to " << result_type;
3947       }
3948       break;
3949 
3950     case DataType::Type::kInt64:
3951       switch (input_type) {
3952         case DataType::Type::kBool:
3953         case DataType::Type::kUint8:
3954         case DataType::Type::kInt8:
3955         case DataType::Type::kUint16:
3956         case DataType::Type::kInt16:
3957         case DataType::Type::kInt32:
3958           locations->SetInAt(0, Location::RequiresRegister());
3959           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3960           break;
3961 
3962         case DataType::Type::kFloat32: {
3963           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3964           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3965           locations->SetOut(LocationFrom(r0, r1));
3966           break;
3967         }
3968 
3969         case DataType::Type::kFloat64: {
3970           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3971           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
3972                                              calling_convention.GetFpuRegisterAt(1)));
3973           locations->SetOut(LocationFrom(r0, r1));
3974           break;
3975         }
3976 
3977         default:
3978           LOG(FATAL) << "Unexpected type conversion from " << input_type
3979                      << " to " << result_type;
3980       }
3981       break;
3982 
3983     case DataType::Type::kFloat32:
3984       switch (input_type) {
3985         case DataType::Type::kBool:
3986         case DataType::Type::kUint8:
3987         case DataType::Type::kInt8:
3988         case DataType::Type::kUint16:
3989         case DataType::Type::kInt16:
3990         case DataType::Type::kInt32:
3991           locations->SetInAt(0, Location::RequiresRegister());
3992           locations->SetOut(Location::RequiresFpuRegister());
3993           break;
3994 
3995         case DataType::Type::kInt64: {
3996           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3997           locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
3998                                              calling_convention.GetRegisterAt(1)));
3999           locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4000           break;
4001         }
4002 
4003         case DataType::Type::kFloat64:
4004           locations->SetInAt(0, Location::RequiresFpuRegister());
4005           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4006           break;
4007 
4008         default:
4009           LOG(FATAL) << "Unexpected type conversion from " << input_type
4010                      << " to " << result_type;
4011       }
4012       break;
4013 
4014     case DataType::Type::kFloat64:
4015       switch (input_type) {
4016         case DataType::Type::kBool:
4017         case DataType::Type::kUint8:
4018         case DataType::Type::kInt8:
4019         case DataType::Type::kUint16:
4020         case DataType::Type::kInt16:
4021         case DataType::Type::kInt32:
4022           locations->SetInAt(0, Location::RequiresRegister());
4023           locations->SetOut(Location::RequiresFpuRegister());
4024           break;
4025 
4026         case DataType::Type::kInt64:
4027           locations->SetInAt(0, Location::RequiresRegister());
4028           locations->SetOut(Location::RequiresFpuRegister());
4029           locations->AddTemp(Location::RequiresFpuRegister());
4030           locations->AddTemp(Location::RequiresFpuRegister());
4031           break;
4032 
4033         case DataType::Type::kFloat32:
4034           locations->SetInAt(0, Location::RequiresFpuRegister());
4035           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4036           break;
4037 
4038         default:
4039           LOG(FATAL) << "Unexpected type conversion from " << input_type
4040                      << " to " << result_type;
4041       }
4042       break;
4043 
4044     default:
4045       LOG(FATAL) << "Unexpected type conversion from " << input_type
4046                  << " to " << result_type;
4047   }
4048 }
4049 
VisitTypeConversion(HTypeConversion * conversion)4050 void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
4051   LocationSummary* locations = conversion->GetLocations();
4052   Location out = locations->Out();
4053   Location in = locations->InAt(0);
4054   DataType::Type result_type = conversion->GetResultType();
4055   DataType::Type input_type = conversion->GetInputType();
4056   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
4057       << input_type << " -> " << result_type;
4058   switch (result_type) {
4059     case DataType::Type::kUint8:
4060       switch (input_type) {
4061         case DataType::Type::kInt8:
4062         case DataType::Type::kUint16:
4063         case DataType::Type::kInt16:
4064         case DataType::Type::kInt32:
4065           __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4066           break;
4067         case DataType::Type::kInt64:
4068           __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4069           break;
4070 
4071         default:
4072           LOG(FATAL) << "Unexpected type conversion from " << input_type
4073                      << " to " << result_type;
4074       }
4075       break;
4076 
4077     case DataType::Type::kInt8:
4078       switch (input_type) {
4079         case DataType::Type::kUint8:
4080         case DataType::Type::kUint16:
4081         case DataType::Type::kInt16:
4082         case DataType::Type::kInt32:
4083           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
4084           break;
4085         case DataType::Type::kInt64:
4086           __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
4087           break;
4088 
4089         default:
4090           LOG(FATAL) << "Unexpected type conversion from " << input_type
4091                      << " to " << result_type;
4092       }
4093       break;
4094 
4095     case DataType::Type::kUint16:
4096       switch (input_type) {
4097         case DataType::Type::kInt8:
4098         case DataType::Type::kInt16:
4099         case DataType::Type::kInt32:
4100           __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4101           break;
4102         case DataType::Type::kInt64:
4103           __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4104           break;
4105 
4106         default:
4107           LOG(FATAL) << "Unexpected type conversion from " << input_type
4108                      << " to " << result_type;
4109       }
4110       break;
4111 
4112     case DataType::Type::kInt16:
4113       switch (input_type) {
4114         case DataType::Type::kUint16:
4115         case DataType::Type::kInt32:
4116           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
4117           break;
4118         case DataType::Type::kInt64:
4119           __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
4120           break;
4121 
4122         default:
4123           LOG(FATAL) << "Unexpected type conversion from " << input_type
4124                      << " to " << result_type;
4125       }
4126       break;
4127 
4128     case DataType::Type::kInt32:
4129       switch (input_type) {
4130         case DataType::Type::kInt64:
4131           DCHECK(out.IsRegister());
4132           if (in.IsRegisterPair()) {
4133             __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
4134           } else if (in.IsDoubleStackSlot()) {
4135             GetAssembler()->LoadFromOffset(kLoadWord,
4136                                            OutputRegister(conversion),
4137                                            sp,
4138                                            in.GetStackIndex());
4139           } else {
4140             DCHECK(in.IsConstant());
4141             DCHECK(in.GetConstant()->IsLongConstant());
4142             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
4143             __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
4144           }
4145           break;
4146 
4147         case DataType::Type::kFloat32: {
4148           vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
4149           __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
4150           __ Vmov(OutputRegister(conversion), temp);
4151           break;
4152         }
4153 
4154         case DataType::Type::kFloat64: {
4155           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4156           __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
4157           __ Vmov(OutputRegister(conversion), temp_s);
4158           break;
4159         }
4160 
4161         default:
4162           LOG(FATAL) << "Unexpected type conversion from " << input_type
4163                      << " to " << result_type;
4164       }
4165       break;
4166 
4167     case DataType::Type::kInt64:
4168       switch (input_type) {
4169         case DataType::Type::kBool:
4170         case DataType::Type::kUint8:
4171         case DataType::Type::kInt8:
4172         case DataType::Type::kUint16:
4173         case DataType::Type::kInt16:
4174         case DataType::Type::kInt32:
4175           DCHECK(out.IsRegisterPair());
4176           DCHECK(in.IsRegister());
4177           __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
4178           // Sign extension.
4179           __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
4180           break;
4181 
4182         case DataType::Type::kFloat32:
4183           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
4184           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
4185           break;
4186 
4187         case DataType::Type::kFloat64:
4188           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
4189           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
4190           break;
4191 
4192         default:
4193           LOG(FATAL) << "Unexpected type conversion from " << input_type
4194                      << " to " << result_type;
4195       }
4196       break;
4197 
4198     case DataType::Type::kFloat32:
4199       switch (input_type) {
4200         case DataType::Type::kBool:
4201         case DataType::Type::kUint8:
4202         case DataType::Type::kInt8:
4203         case DataType::Type::kUint16:
4204         case DataType::Type::kInt16:
4205         case DataType::Type::kInt32:
4206           __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
4207           __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
4208           break;
4209 
4210         case DataType::Type::kInt64:
4211           codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc());
4212           CheckEntrypointTypes<kQuickL2f, float, int64_t>();
4213           break;
4214 
4215         case DataType::Type::kFloat64:
4216           __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
4217           break;
4218 
4219         default:
4220           LOG(FATAL) << "Unexpected type conversion from " << input_type
4221                      << " to " << result_type;
4222       }
4223       break;
4224 
4225     case DataType::Type::kFloat64:
4226       switch (input_type) {
4227         case DataType::Type::kBool:
4228         case DataType::Type::kUint8:
4229         case DataType::Type::kInt8:
4230         case DataType::Type::kUint16:
4231         case DataType::Type::kInt16:
4232         case DataType::Type::kInt32:
4233           __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
4234           __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
4235           break;
4236 
4237         case DataType::Type::kInt64: {
4238           vixl32::Register low = LowRegisterFrom(in);
4239           vixl32::Register high = HighRegisterFrom(in);
4240           vixl32::SRegister out_s = LowSRegisterFrom(out);
4241           vixl32::DRegister out_d = DRegisterFrom(out);
4242           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4243           vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
4244           vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
4245 
4246           // temp_d = int-to-double(high)
4247           __ Vmov(temp_s, high);
4248           __ Vcvt(F64, S32, temp_d, temp_s);
4249           // constant_d = k2Pow32EncodingForDouble
4250           __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
4251           // out_d = unsigned-to-double(low)
4252           __ Vmov(out_s, low);
4253           __ Vcvt(F64, U32, out_d, out_s);
4254           // out_d += temp_d * constant_d
4255           __ Vmla(F64, out_d, temp_d, constant_d);
4256           break;
4257         }
4258 
4259         case DataType::Type::kFloat32:
4260           __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
4261           break;
4262 
4263         default:
4264           LOG(FATAL) << "Unexpected type conversion from " << input_type
4265                      << " to " << result_type;
4266       }
4267       break;
4268 
4269     default:
4270       LOG(FATAL) << "Unexpected type conversion from " << input_type
4271                  << " to " << result_type;
4272   }
4273 }
4274 
VisitAdd(HAdd * add)4275 void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) {
4276   LocationSummary* locations =
4277       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
4278   switch (add->GetResultType()) {
4279     case DataType::Type::kInt32: {
4280       locations->SetInAt(0, Location::RequiresRegister());
4281       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
4282       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4283       break;
4284     }
4285 
4286     case DataType::Type::kInt64: {
4287       locations->SetInAt(0, Location::RequiresRegister());
4288       locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
4289       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4290       break;
4291     }
4292 
4293     case DataType::Type::kFloat32:
4294     case DataType::Type::kFloat64: {
4295       locations->SetInAt(0, Location::RequiresFpuRegister());
4296       locations->SetInAt(1, Location::RequiresFpuRegister());
4297       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4298       break;
4299     }
4300 
4301     default:
4302       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4303   }
4304 }
4305 
VisitAdd(HAdd * add)4306 void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) {
4307   LocationSummary* locations = add->GetLocations();
4308   Location out = locations->Out();
4309   Location first = locations->InAt(0);
4310   Location second = locations->InAt(1);
4311 
4312   switch (add->GetResultType()) {
4313     case DataType::Type::kInt32: {
4314       __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1));
4315       }
4316       break;
4317 
4318     case DataType::Type::kInt64: {
4319       if (second.IsConstant()) {
4320         uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4321         GenerateAddLongConst(out, first, value);
4322       } else {
4323         DCHECK(second.IsRegisterPair());
4324         __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4325         __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4326       }
4327       break;
4328     }
4329 
4330     case DataType::Type::kFloat32:
4331     case DataType::Type::kFloat64:
4332       __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
4333       break;
4334 
4335     default:
4336       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4337   }
4338 }
4339 
VisitSub(HSub * sub)4340 void LocationsBuilderARMVIXL::VisitSub(HSub* sub) {
4341   LocationSummary* locations =
4342       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
4343   switch (sub->GetResultType()) {
4344     case DataType::Type::kInt32: {
4345       locations->SetInAt(0, Location::RequiresRegister());
4346       locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
4347       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4348       break;
4349     }
4350 
4351     case DataType::Type::kInt64: {
4352       locations->SetInAt(0, Location::RequiresRegister());
4353       locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
4354       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4355       break;
4356     }
4357     case DataType::Type::kFloat32:
4358     case DataType::Type::kFloat64: {
4359       locations->SetInAt(0, Location::RequiresFpuRegister());
4360       locations->SetInAt(1, Location::RequiresFpuRegister());
4361       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4362       break;
4363     }
4364     default:
4365       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4366   }
4367 }
4368 
VisitSub(HSub * sub)4369 void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) {
4370   LocationSummary* locations = sub->GetLocations();
4371   Location out = locations->Out();
4372   Location first = locations->InAt(0);
4373   Location second = locations->InAt(1);
4374   switch (sub->GetResultType()) {
4375     case DataType::Type::kInt32: {
4376       __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
4377       break;
4378     }
4379 
4380     case DataType::Type::kInt64: {
4381       if (second.IsConstant()) {
4382         uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4383         GenerateAddLongConst(out, first, -value);
4384       } else {
4385         DCHECK(second.IsRegisterPair());
4386         __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4387         __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4388       }
4389       break;
4390     }
4391 
4392     case DataType::Type::kFloat32:
4393     case DataType::Type::kFloat64:
4394       __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
4395       break;
4396 
4397     default:
4398       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4399   }
4400 }
4401 
VisitMul(HMul * mul)4402 void LocationsBuilderARMVIXL::VisitMul(HMul* mul) {
4403   LocationSummary* locations =
4404       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4405   switch (mul->GetResultType()) {
4406     case DataType::Type::kInt32:
4407     case DataType::Type::kInt64:  {
4408       locations->SetInAt(0, Location::RequiresRegister());
4409       locations->SetInAt(1, Location::RequiresRegister());
4410       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4411       break;
4412     }
4413 
4414     case DataType::Type::kFloat32:
4415     case DataType::Type::kFloat64: {
4416       locations->SetInAt(0, Location::RequiresFpuRegister());
4417       locations->SetInAt(1, Location::RequiresFpuRegister());
4418       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4419       break;
4420     }
4421 
4422     default:
4423       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4424   }
4425 }
4426 
VisitMul(HMul * mul)4427 void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) {
4428   LocationSummary* locations = mul->GetLocations();
4429   Location out = locations->Out();
4430   Location first = locations->InAt(0);
4431   Location second = locations->InAt(1);
4432   switch (mul->GetResultType()) {
4433     case DataType::Type::kInt32: {
4434       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
4435       break;
4436     }
4437     case DataType::Type::kInt64: {
4438       vixl32::Register out_hi = HighRegisterFrom(out);
4439       vixl32::Register out_lo = LowRegisterFrom(out);
4440       vixl32::Register in1_hi = HighRegisterFrom(first);
4441       vixl32::Register in1_lo = LowRegisterFrom(first);
4442       vixl32::Register in2_hi = HighRegisterFrom(second);
4443       vixl32::Register in2_lo = LowRegisterFrom(second);
4444 
4445       // Extra checks to protect caused by the existence of R1_R2.
4446       // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
4447       // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
4448       DCHECK(!out_hi.Is(in1_lo));
4449       DCHECK(!out_hi.Is(in2_lo));
4450 
4451       // input: in1 - 64 bits, in2 - 64 bits
4452       // output: out
4453       // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
4454       // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
4455       // parts: out.lo = (in1.lo * in2.lo)[31:0]
4456 
4457       UseScratchRegisterScope temps(GetVIXLAssembler());
4458       vixl32::Register temp = temps.Acquire();
4459       // temp <- in1.lo * in2.hi
4460       __ Mul(temp, in1_lo, in2_hi);
4461       // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo
4462       __ Mla(out_hi, in1_hi, in2_lo, temp);
4463       // out.lo <- (in1.lo * in2.lo)[31:0];
4464       __ Umull(out_lo, temp, in1_lo, in2_lo);
4465       // out.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
4466       __ Add(out_hi, out_hi, temp);
4467       break;
4468     }
4469 
4470     case DataType::Type::kFloat32:
4471     case DataType::Type::kFloat64:
4472       __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
4473       break;
4474 
4475     default:
4476       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4477   }
4478 }
4479 
DivRemOneOrMinusOne(HBinaryOperation * instruction)4480 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4481   DCHECK(instruction->IsDiv() || instruction->IsRem());
4482   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4483 
4484   Location second = instruction->GetLocations()->InAt(1);
4485   DCHECK(second.IsConstant());
4486 
4487   vixl32::Register out = OutputRegister(instruction);
4488   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4489   int32_t imm = Int32ConstantFrom(second);
4490   DCHECK(imm == 1 || imm == -1);
4491 
4492   if (instruction->IsRem()) {
4493     __ Mov(out, 0);
4494   } else {
4495     if (imm == 1) {
4496       __ Mov(out, dividend);
4497     } else {
4498       __ Rsb(out, dividend, 0);
4499     }
4500   }
4501 }
4502 
DivRemByPowerOfTwo(HBinaryOperation * instruction)4503 void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
4504   DCHECK(instruction->IsDiv() || instruction->IsRem());
4505   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4506 
4507   LocationSummary* locations = instruction->GetLocations();
4508   Location second = locations->InAt(1);
4509   DCHECK(second.IsConstant());
4510 
4511   vixl32::Register out = OutputRegister(instruction);
4512   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4513   int32_t imm = Int32ConstantFrom(second);
4514   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4515   int ctz_imm = CTZ(abs_imm);
4516 
4517   auto generate_div_code = [this, imm, ctz_imm](vixl32::Register out, vixl32::Register in) {
4518     __ Asr(out, in, ctz_imm);
4519     if (imm < 0) {
4520       __ Rsb(out, out, 0);
4521     }
4522   };
4523 
4524   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
4525     // No need to adjust the result for non-negative dividends or the INT32_MIN dividend.
4526     // NOTE: The generated code for HDiv/HRem correctly works for the INT32_MIN dividend:
4527     //   imm == 2
4528     //     HDiv
4529     //      add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
4530     //      asr out, out(0x80000001), #1 => out = 0xc0000000
4531     //      This is the same as 'asr out, dividend(0x80000000), #1'
4532     //
4533     //   imm > 2
4534     //     HDiv
4535     //      asr out, dividend(0x80000000), #31 => out = -1
4536     //      add out, dividend(0x80000000), out(-1), lsr #(32 - ctz_imm) => out = 0b10..01..1,
4537     //          where the number of the rightmost 1s is ctz_imm.
4538     //      asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
4539     //          leftmost 1s is ctz_imm + 1.
4540     //      This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
4541     //
4542     //   imm == INT32_MIN
4543     //     HDiv
4544     //      asr out, dividend(0x80000000), #31 => out = -1
4545     //      add out, dividend(0x80000000), out(-1), lsr #1 => out = 0xc0000000
4546     //      asr out, out(0xc0000000), #31 => out = -1
4547     //      rsb out, out(-1), #0 => out = 1
4548     //      This is the same as
4549     //        asr out, dividend(0x80000000), #31
4550     //        rsb out, out, #0
4551     //
4552     //
4553     //   INT_MIN % imm must be 0 for any imm of power 2. 'and' and 'ubfx' work only with bits
4554     //   0..30 of a dividend. For INT32_MIN those bits are zeros. So 'and' and 'ubfx' always
4555     //   produce zero.
4556     if (instruction->IsDiv()) {
4557       generate_div_code(out, dividend);
4558     } else {
4559       if (GetVIXLAssembler()->IsModifiedImmediate(abs_imm - 1)) {
4560         __ And(out, dividend, abs_imm - 1);
4561       } else {
4562         __ Ubfx(out, dividend, 0, ctz_imm);
4563       }
4564       return;
4565     }
4566   } else {
4567     vixl32::Register add_right_input = dividend;
4568     if (ctz_imm > 1) {
4569       __ Asr(out, dividend, 31);
4570       add_right_input = out;
4571     }
4572     __ Add(out, dividend, Operand(add_right_input, vixl32::LSR, 32 - ctz_imm));
4573 
4574     if (instruction->IsDiv()) {
4575       generate_div_code(out, out);
4576     } else {
4577       __ Bfc(out, 0, ctz_imm);
4578       __ Sub(out, dividend, out);
4579     }
4580   }
4581 }
4582 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4583 void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4584   DCHECK(instruction->IsDiv() || instruction->IsRem());
4585   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4586 
4587   LocationSummary* locations = instruction->GetLocations();
4588   Location second = locations->InAt(1);
4589   DCHECK(second.IsConstant());
4590 
4591   vixl32::Register out = OutputRegister(instruction);
4592   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4593   vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
4594   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
4595   int32_t imm = Int32ConstantFrom(second);
4596 
4597   int64_t magic;
4598   int shift;
4599   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4600 
4601   auto generate_unsigned_div_code =[this, magic, shift](vixl32::Register out,
4602                                                         vixl32::Register dividend,
4603                                                         vixl32::Register temp1,
4604                                                         vixl32::Register temp2) {
4605     // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4606     __ Mov(temp1, static_cast<int32_t>(magic));
4607     if (magic > 0 && shift == 0) {
4608       __ Smull(temp2, out, dividend, temp1);
4609     } else {
4610       __ Smull(temp2, temp1, dividend, temp1);
4611       if (magic < 0) {
4612         // The negative magic M = static_cast<int>(m) means that the multiplier m is greater
4613         // than INT32_MAX. In such a case shift is never 0.
4614         // Proof:
4615         //   m = (2^p + d - 2^p % d) / d, where p = 32 + shift, d > 2
4616         //
4617         //   If shift == 0, m = (2^32 + d - 2^32 % d) / d =
4618         //   = (2^32 + d - (2^32 - (2^32 / d) * d)) / d =
4619         //   = (d + (2^32 / d) * d) / d = 1 + (2^32 / d), here '/' is the integer division.
4620         //
4621         //   1 + (2^32 / d) is decreasing when d is increasing.
4622         //   The maximum is 1 431 655 766, when d == 3. This value is less than INT32_MAX.
4623         //   the minimum is 3, when d = 2^31 -1.
4624         //   So for all values of d in [3, INT32_MAX] m with p == 32 is in [3, INT32_MAX) and
4625         //   is never less than 0.
4626         __ Add(temp1, temp1, dividend);
4627       }
4628       DCHECK_NE(shift, 0);
4629       __ Lsr(out, temp1, shift);
4630     }
4631   };
4632 
4633   if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
4634     // No need to adjust the result for a non-negative dividend and a positive divisor.
4635     if (instruction->IsDiv()) {
4636       generate_unsigned_div_code(out, dividend, temp1, temp2);
4637     } else {
4638       generate_unsigned_div_code(temp1, dividend, temp1, temp2);
4639       __ Mov(temp2, imm);
4640       __ Mls(out, temp1, temp2, dividend);
4641     }
4642   } else {
4643     // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4644     __ Mov(temp1, static_cast<int32_t>(magic));
4645     __ Smull(temp2, temp1, dividend, temp1);
4646 
4647     if (imm > 0 && magic < 0) {
4648       __ Add(temp1, temp1, dividend);
4649     } else if (imm < 0 && magic > 0) {
4650       __ Sub(temp1, temp1, dividend);
4651     }
4652 
4653     if (shift != 0) {
4654       __ Asr(temp1, temp1, shift);
4655     }
4656 
4657     if (instruction->IsDiv()) {
4658       __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4659     } else {
4660       __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4661       // TODO: Strength reduction for mls.
4662       __ Mov(temp2, imm);
4663       __ Mls(out, temp1, temp2, dividend);
4664     }
4665   }
4666 }
4667 
GenerateDivRemConstantIntegral(HBinaryOperation * instruction)4668 void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral(
4669     HBinaryOperation* instruction) {
4670   DCHECK(instruction->IsDiv() || instruction->IsRem());
4671   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4672 
4673   Location second = instruction->GetLocations()->InAt(1);
4674   DCHECK(second.IsConstant());
4675 
4676   int32_t imm = Int32ConstantFrom(second);
4677   if (imm == 0) {
4678     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4679   } else if (imm == 1 || imm == -1) {
4680     DivRemOneOrMinusOne(instruction);
4681   } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4682     DivRemByPowerOfTwo(instruction);
4683   } else {
4684     DCHECK(imm <= -2 || imm >= 2);
4685     GenerateDivRemWithAnyConstant(instruction);
4686   }
4687 }
4688 
VisitDiv(HDiv * div)4689 void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
4690   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4691   if (div->GetResultType() == DataType::Type::kInt64) {
4692     // pLdiv runtime call.
4693     call_kind = LocationSummary::kCallOnMainOnly;
4694   } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) {
4695     // sdiv will be replaced by other instruction sequence.
4696   } else if (div->GetResultType() == DataType::Type::kInt32 &&
4697              !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4698     // pIdivmod runtime call.
4699     call_kind = LocationSummary::kCallOnMainOnly;
4700   }
4701 
4702   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4703 
4704   switch (div->GetResultType()) {
4705     case DataType::Type::kInt32: {
4706       HInstruction* divisor = div->InputAt(1);
4707       if (divisor->IsConstant()) {
4708         locations->SetInAt(0, Location::RequiresRegister());
4709         locations->SetInAt(1, Location::ConstantLocation(divisor));
4710         int32_t value = Int32ConstantFrom(divisor);
4711         Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4712         if (value == 1 || value == 0 || value == -1) {
4713           // No temp register required.
4714         } else if (IsPowerOfTwo(AbsOrMin(value)) &&
4715                    value != 2 &&
4716                    value != -2 &&
4717                    !HasNonNegativeOrMinIntInputAt(div, 0)) {
4718           // The "out" register is used as a temporary, so it overlaps with the inputs.
4719           out_overlaps = Location::kOutputOverlap;
4720         } else {
4721           locations->AddRegisterTemps(2);
4722         }
4723         locations->SetOut(Location::RequiresRegister(), out_overlaps);
4724       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4725         locations->SetInAt(0, Location::RequiresRegister());
4726         locations->SetInAt(1, Location::RequiresRegister());
4727         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4728       } else {
4729         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4730         locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4731         locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4732         // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4733         //       we only need the former.
4734         locations->SetOut(LocationFrom(r0));
4735       }
4736       break;
4737     }
4738     case DataType::Type::kInt64: {
4739       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4740       locations->SetInAt(0, LocationFrom(
4741           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4742       locations->SetInAt(1, LocationFrom(
4743           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4744       locations->SetOut(LocationFrom(r0, r1));
4745       break;
4746     }
4747     case DataType::Type::kFloat32:
4748     case DataType::Type::kFloat64: {
4749       locations->SetInAt(0, Location::RequiresFpuRegister());
4750       locations->SetInAt(1, Location::RequiresFpuRegister());
4751       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4752       break;
4753     }
4754 
4755     default:
4756       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4757   }
4758 }
4759 
VisitDiv(HDiv * div)4760 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
4761   Location lhs = div->GetLocations()->InAt(0);
4762   Location rhs = div->GetLocations()->InAt(1);
4763 
4764   switch (div->GetResultType()) {
4765     case DataType::Type::kInt32: {
4766       if (rhs.IsConstant()) {
4767         GenerateDivRemConstantIntegral(div);
4768       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4769         __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
4770       } else {
4771         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4772         DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
4773         DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
4774         DCHECK(r0.Is(OutputRegister(div)));
4775 
4776         codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
4777         CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4778       }
4779       break;
4780     }
4781 
4782     case DataType::Type::kInt64: {
4783       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4784       DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
4785       DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
4786       DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
4787       DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
4788       DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
4789       DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
4790 
4791       codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
4792       CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4793       break;
4794     }
4795 
4796     case DataType::Type::kFloat32:
4797     case DataType::Type::kFloat64:
4798       __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
4799       break;
4800 
4801     default:
4802       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4803   }
4804 }
4805 
VisitRem(HRem * rem)4806 void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
4807   DataType::Type type = rem->GetResultType();
4808 
4809   // Most remainders are implemented in the runtime.
4810   LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
4811   if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) {
4812     // sdiv will be replaced by other instruction sequence.
4813     call_kind = LocationSummary::kNoCall;
4814   } else if ((rem->GetResultType() == DataType::Type::kInt32)
4815              && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4816     // Have hardware divide instruction for int, do it with three instructions.
4817     call_kind = LocationSummary::kNoCall;
4818   }
4819 
4820   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4821 
4822   switch (type) {
4823     case DataType::Type::kInt32: {
4824       HInstruction* divisor = rem->InputAt(1);
4825       if (divisor->IsConstant()) {
4826         locations->SetInAt(0, Location::RequiresRegister());
4827         locations->SetInAt(1, Location::ConstantLocation(divisor));
4828         int32_t value = Int32ConstantFrom(divisor);
4829         Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4830         if (value == 1 || value == 0 || value == -1) {
4831           // No temp register required.
4832         } else if (IsPowerOfTwo(AbsOrMin(value)) && !HasNonNegativeOrMinIntInputAt(rem, 0)) {
4833           // The "out" register is used as a temporary, so it overlaps with the inputs.
4834           out_overlaps = Location::kOutputOverlap;
4835         } else {
4836           locations->AddRegisterTemps(2);
4837         }
4838         locations->SetOut(Location::RequiresRegister(), out_overlaps);
4839       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4840         locations->SetInAt(0, Location::RequiresRegister());
4841         locations->SetInAt(1, Location::RequiresRegister());
4842         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4843         locations->AddTemp(Location::RequiresRegister());
4844       } else {
4845         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4846         locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4847         locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4848         // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4849         //       we only need the latter.
4850         locations->SetOut(LocationFrom(r1));
4851       }
4852       break;
4853     }
4854     case DataType::Type::kInt64: {
4855       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4856       locations->SetInAt(0, LocationFrom(
4857           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4858       locations->SetInAt(1, LocationFrom(
4859           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4860       // The runtime helper puts the output in R2,R3.
4861       locations->SetOut(LocationFrom(r2, r3));
4862       break;
4863     }
4864     case DataType::Type::kFloat32: {
4865       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4866       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4867       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
4868       locations->SetOut(LocationFrom(s0));
4869       break;
4870     }
4871 
4872     case DataType::Type::kFloat64: {
4873       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4874       locations->SetInAt(0, LocationFrom(
4875           calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
4876       locations->SetInAt(1, LocationFrom(
4877           calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
4878       locations->SetOut(LocationFrom(s0, s1));
4879       break;
4880     }
4881 
4882     default:
4883       LOG(FATAL) << "Unexpected rem type " << type;
4884   }
4885 }
4886 
VisitRem(HRem * rem)4887 void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
4888   LocationSummary* locations = rem->GetLocations();
4889   Location second = locations->InAt(1);
4890 
4891   DataType::Type type = rem->GetResultType();
4892   switch (type) {
4893     case DataType::Type::kInt32: {
4894         vixl32::Register reg1 = InputRegisterAt(rem, 0);
4895         vixl32::Register out_reg = OutputRegister(rem);
4896         if (second.IsConstant()) {
4897           GenerateDivRemConstantIntegral(rem);
4898         } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4899         vixl32::Register reg2 = RegisterFrom(second);
4900         vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
4901 
4902         // temp = reg1 / reg2  (integer division)
4903         // dest = reg1 - temp * reg2
4904         __ Sdiv(temp, reg1, reg2);
4905         __ Mls(out_reg, temp, reg2, reg1);
4906       } else {
4907         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4908         DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
4909         DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
4910         DCHECK(out_reg.Is(r1));
4911 
4912         codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
4913         CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4914       }
4915       break;
4916     }
4917 
4918     case DataType::Type::kInt64: {
4919       codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
4920         CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4921       break;
4922     }
4923 
4924     case DataType::Type::kFloat32: {
4925       codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
4926       CheckEntrypointTypes<kQuickFmodf, float, float, float>();
4927       break;
4928     }
4929 
4930     case DataType::Type::kFloat64: {
4931       codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
4932       CheckEntrypointTypes<kQuickFmod, double, double, double>();
4933       break;
4934     }
4935 
4936     default:
4937       LOG(FATAL) << "Unexpected rem type " << type;
4938   }
4939 }
4940 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4941 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4942   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4943   switch (minmax->GetResultType()) {
4944     case DataType::Type::kInt32:
4945       locations->SetInAt(0, Location::RequiresRegister());
4946       locations->SetInAt(1, Location::RequiresRegister());
4947       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4948       break;
4949     case DataType::Type::kInt64:
4950       locations->SetInAt(0, Location::RequiresRegister());
4951       locations->SetInAt(1, Location::RequiresRegister());
4952       locations->SetOut(Location::SameAsFirstInput());
4953       break;
4954     case DataType::Type::kFloat32:
4955       locations->SetInAt(0, Location::RequiresFpuRegister());
4956       locations->SetInAt(1, Location::RequiresFpuRegister());
4957       locations->SetOut(Location::SameAsFirstInput());
4958       locations->AddTemp(Location::RequiresRegister());
4959       break;
4960     case DataType::Type::kFloat64:
4961       locations->SetInAt(0, Location::RequiresFpuRegister());
4962       locations->SetInAt(1, Location::RequiresFpuRegister());
4963       locations->SetOut(Location::SameAsFirstInput());
4964       break;
4965     default:
4966       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4967   }
4968 }
4969 
GenerateMinMaxInt(LocationSummary * locations,bool is_min)4970 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
4971   Location op1_loc = locations->InAt(0);
4972   Location op2_loc = locations->InAt(1);
4973   Location out_loc = locations->Out();
4974 
4975   vixl32::Register op1 = RegisterFrom(op1_loc);
4976   vixl32::Register op2 = RegisterFrom(op2_loc);
4977   vixl32::Register out = RegisterFrom(out_loc);
4978 
4979   __ Cmp(op1, op2);
4980 
4981   {
4982     ExactAssemblyScope aas(GetVIXLAssembler(),
4983                            3 * kMaxInstructionSizeInBytes,
4984                            CodeBufferCheckScope::kMaximumSize);
4985 
4986     __ ite(is_min ? lt : gt);
4987     __ mov(is_min ? lt : gt, out, op1);
4988     __ mov(is_min ? ge : le, out, op2);
4989   }
4990 }
4991 
GenerateMinMaxLong(LocationSummary * locations,bool is_min)4992 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
4993   Location op1_loc = locations->InAt(0);
4994   Location op2_loc = locations->InAt(1);
4995   Location out_loc = locations->Out();
4996 
4997   // Optimization: don't generate any code if inputs are the same.
4998   if (op1_loc.Equals(op2_loc)) {
4999     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
5000     return;
5001   }
5002 
5003   vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
5004   vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
5005   vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
5006   vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
5007   vixl32::Register out_lo = LowRegisterFrom(out_loc);
5008   vixl32::Register out_hi = HighRegisterFrom(out_loc);
5009   UseScratchRegisterScope temps(GetVIXLAssembler());
5010   const vixl32::Register temp = temps.Acquire();
5011 
5012   DCHECK(op1_lo.Is(out_lo));
5013   DCHECK(op1_hi.Is(out_hi));
5014 
5015   // Compare op1 >= op2, or op1 < op2.
5016   __ Cmp(out_lo, op2_lo);
5017   __ Sbcs(temp, out_hi, op2_hi);
5018 
5019   // Now GE/LT condition code is correct for the long comparison.
5020   {
5021     vixl32::ConditionType cond = is_min ? ge : lt;
5022     ExactAssemblyScope it_scope(GetVIXLAssembler(),
5023                                 3 * kMaxInstructionSizeInBytes,
5024                                 CodeBufferCheckScope::kMaximumSize);
5025     __ itt(cond);
5026     __ mov(cond, out_lo, op2_lo);
5027     __ mov(cond, out_hi, op2_hi);
5028   }
5029 }
5030 
GenerateMinMaxFloat(HInstruction * minmax,bool is_min)5031 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
5032   LocationSummary* locations = minmax->GetLocations();
5033   Location op1_loc = locations->InAt(0);
5034   Location op2_loc = locations->InAt(1);
5035   Location out_loc = locations->Out();
5036 
5037   // Optimization: don't generate any code if inputs are the same.
5038   if (op1_loc.Equals(op2_loc)) {
5039     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
5040     return;
5041   }
5042 
5043   vixl32::SRegister op1 = SRegisterFrom(op1_loc);
5044   vixl32::SRegister op2 = SRegisterFrom(op2_loc);
5045   vixl32::SRegister out = SRegisterFrom(out_loc);
5046 
5047   UseScratchRegisterScope temps(GetVIXLAssembler());
5048   const vixl32::Register temp1 = temps.Acquire();
5049   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
5050   vixl32::Label nan, done;
5051   vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5052 
5053   DCHECK(op1.Is(out));
5054 
5055   __ Vcmp(op1, op2);
5056   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5057   __ B(vs, &nan, /* is_far_target= */ false);  // if un-ordered, go to NaN handling.
5058 
5059   // op1 <> op2
5060   vixl32::ConditionType cond = is_min ? gt : lt;
5061   {
5062     ExactAssemblyScope it_scope(GetVIXLAssembler(),
5063                                 2 * kMaxInstructionSizeInBytes,
5064                                 CodeBufferCheckScope::kMaximumSize);
5065     __ it(cond);
5066     __ vmov(cond, F32, out, op2);
5067   }
5068   // for <>(not equal), we've done min/max calculation.
5069   __ B(ne, final_label, /* is_far_target= */ false);
5070 
5071   // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
5072   __ Vmov(temp1, op1);
5073   __ Vmov(temp2, op2);
5074   if (is_min) {
5075     __ Orr(temp1, temp1, temp2);
5076   } else {
5077     __ And(temp1, temp1, temp2);
5078   }
5079   __ Vmov(out, temp1);
5080   __ B(final_label);
5081 
5082   // handle NaN input.
5083   __ Bind(&nan);
5084   __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
5085   __ Vmov(out, temp1);
5086 
5087   if (done.IsReferenced()) {
5088     __ Bind(&done);
5089   }
5090 }
5091 
GenerateMinMaxDouble(HInstruction * minmax,bool is_min)5092 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
5093   LocationSummary* locations = minmax->GetLocations();
5094   Location op1_loc = locations->InAt(0);
5095   Location op2_loc = locations->InAt(1);
5096   Location out_loc = locations->Out();
5097 
5098   // Optimization: don't generate any code if inputs are the same.
5099   if (op1_loc.Equals(op2_loc)) {
5100     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in.
5101     return;
5102   }
5103 
5104   vixl32::DRegister op1 = DRegisterFrom(op1_loc);
5105   vixl32::DRegister op2 = DRegisterFrom(op2_loc);
5106   vixl32::DRegister out = DRegisterFrom(out_loc);
5107   vixl32::Label handle_nan_eq, done;
5108   vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
5109 
5110   DCHECK(op1.Is(out));
5111 
5112   __ Vcmp(op1, op2);
5113   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5114   __ B(vs, &handle_nan_eq, /* is_far_target= */ false);  // if un-ordered, go to NaN handling.
5115 
5116   // op1 <> op2
5117   vixl32::ConditionType cond = is_min ? gt : lt;
5118   {
5119     ExactAssemblyScope it_scope(GetVIXLAssembler(),
5120                                 2 * kMaxInstructionSizeInBytes,
5121                                 CodeBufferCheckScope::kMaximumSize);
5122     __ it(cond);
5123     __ vmov(cond, F64, out, op2);
5124   }
5125   // for <>(not equal), we've done min/max calculation.
5126   __ B(ne, final_label, /* is_far_target= */ false);
5127 
5128   // handle op1 == op2, max(+0.0,-0.0).
5129   if (!is_min) {
5130     __ Vand(F64, out, op1, op2);
5131     __ B(final_label);
5132   }
5133 
5134   // handle op1 == op2, min(+0.0,-0.0), NaN input.
5135   __ Bind(&handle_nan_eq);
5136   __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
5137 
5138   if (done.IsReferenced()) {
5139     __ Bind(&done);
5140   }
5141 }
5142 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)5143 void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
5144   DataType::Type type = minmax->GetResultType();
5145   switch (type) {
5146     case DataType::Type::kInt32:
5147       GenerateMinMaxInt(minmax->GetLocations(), is_min);
5148       break;
5149     case DataType::Type::kInt64:
5150       GenerateMinMaxLong(minmax->GetLocations(), is_min);
5151       break;
5152     case DataType::Type::kFloat32:
5153       GenerateMinMaxFloat(minmax, is_min);
5154       break;
5155     case DataType::Type::kFloat64:
5156       GenerateMinMaxDouble(minmax, is_min);
5157       break;
5158     default:
5159       LOG(FATAL) << "Unexpected type for HMinMax " << type;
5160   }
5161 }
5162 
VisitMin(HMin * min)5163 void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
5164   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
5165 }
5166 
VisitMin(HMin * min)5167 void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
5168   GenerateMinMax(min, /*is_min*/ true);
5169 }
5170 
VisitMax(HMax * max)5171 void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
5172   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
5173 }
5174 
VisitMax(HMax * max)5175 void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
5176   GenerateMinMax(max, /*is_min*/ false);
5177 }
5178 
VisitAbs(HAbs * abs)5179 void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
5180   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5181   switch (abs->GetResultType()) {
5182     case DataType::Type::kInt32:
5183     case DataType::Type::kInt64:
5184       locations->SetInAt(0, Location::RequiresRegister());
5185       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5186       locations->AddTemp(Location::RequiresRegister());
5187       break;
5188     case DataType::Type::kFloat32:
5189     case DataType::Type::kFloat64:
5190       locations->SetInAt(0, Location::RequiresFpuRegister());
5191       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5192       break;
5193     default:
5194       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5195   }
5196 }
5197 
VisitAbs(HAbs * abs)5198 void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
5199   LocationSummary* locations = abs->GetLocations();
5200   switch (abs->GetResultType()) {
5201     case DataType::Type::kInt32: {
5202       vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
5203       vixl32::Register out_reg = RegisterFrom(locations->Out());
5204       vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5205       __ Asr(mask, in_reg, 31);
5206       __ Add(out_reg, in_reg, mask);
5207       __ Eor(out_reg, out_reg, mask);
5208       break;
5209     }
5210     case DataType::Type::kInt64: {
5211       Location in = locations->InAt(0);
5212       vixl32::Register in_reg_lo = LowRegisterFrom(in);
5213       vixl32::Register in_reg_hi = HighRegisterFrom(in);
5214       Location output = locations->Out();
5215       vixl32::Register out_reg_lo = LowRegisterFrom(output);
5216       vixl32::Register out_reg_hi = HighRegisterFrom(output);
5217       DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
5218       vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
5219       __ Asr(mask, in_reg_hi, 31);
5220       __ Adds(out_reg_lo, in_reg_lo, mask);
5221       __ Adc(out_reg_hi, in_reg_hi, mask);
5222       __ Eor(out_reg_lo, out_reg_lo, mask);
5223       __ Eor(out_reg_hi, out_reg_hi, mask);
5224       break;
5225     }
5226     case DataType::Type::kFloat32:
5227     case DataType::Type::kFloat64:
5228       __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
5229       break;
5230     default:
5231       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5232   }
5233 }
5234 
VisitDivZeroCheck(HDivZeroCheck * instruction)5235 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5236   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5237   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5238 }
5239 
VisitDivZeroCheck(HDivZeroCheck * instruction)5240 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5241   DivZeroCheckSlowPathARMVIXL* slow_path =
5242       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction);
5243   codegen_->AddSlowPath(slow_path);
5244 
5245   LocationSummary* locations = instruction->GetLocations();
5246   Location value = locations->InAt(0);
5247 
5248   switch (instruction->GetType()) {
5249     case DataType::Type::kBool:
5250     case DataType::Type::kUint8:
5251     case DataType::Type::kInt8:
5252     case DataType::Type::kUint16:
5253     case DataType::Type::kInt16:
5254     case DataType::Type::kInt32: {
5255       if (value.IsRegister()) {
5256         __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
5257       } else {
5258         DCHECK(value.IsConstant()) << value;
5259         if (Int32ConstantFrom(value) == 0) {
5260           __ B(slow_path->GetEntryLabel());
5261         }
5262       }
5263       break;
5264     }
5265     case DataType::Type::kInt64: {
5266       if (value.IsRegisterPair()) {
5267         UseScratchRegisterScope temps(GetVIXLAssembler());
5268         vixl32::Register temp = temps.Acquire();
5269         __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
5270         __ B(eq, slow_path->GetEntryLabel());
5271       } else {
5272         DCHECK(value.IsConstant()) << value;
5273         if (Int64ConstantFrom(value) == 0) {
5274           __ B(slow_path->GetEntryLabel());
5275         }
5276       }
5277       break;
5278     }
5279     default:
5280       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
5281   }
5282 }
5283 
HandleIntegerRotate(HRor * ror)5284 void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HRor* ror) {
5285   LocationSummary* locations = ror->GetLocations();
5286   vixl32::Register in = InputRegisterAt(ror, 0);
5287   Location rhs = locations->InAt(1);
5288   vixl32::Register out = OutputRegister(ror);
5289 
5290   if (rhs.IsConstant()) {
5291     // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
5292     // so map all rotations to a +ve. equivalent in that range.
5293     // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
5294     uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
5295     if (rot) {
5296       // Rotate, mapping left rotations to right equivalents if necessary.
5297       // (e.g. left by 2 bits == right by 30.)
5298       __ Ror(out, in, rot);
5299     } else if (!out.Is(in)) {
5300       __ Mov(out, in);
5301     }
5302   } else {
5303     __ Ror(out, in, RegisterFrom(rhs));
5304   }
5305 }
5306 
5307 // Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
5308 // rotates by swapping input regs (effectively rotating by the first 32-bits of
5309 // a larger rotation) or flipping direction (thus treating larger right/left
5310 // rotations as sub-word sized rotations in the other direction) as appropriate.
HandleLongRotate(HRor * ror)5311 void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
5312   LocationSummary* locations = ror->GetLocations();
5313   vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
5314   vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
5315   Location rhs = locations->InAt(1);
5316   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
5317   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
5318 
5319   if (rhs.IsConstant()) {
5320     uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
5321     // Map all rotations to +ve. equivalents on the interval [0,63].
5322     rot &= kMaxLongShiftDistance;
5323     // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
5324     // logic below to a simple pair of binary orr.
5325     // (e.g. 34 bits == in_reg swap + 2 bits right.)
5326     if (rot >= kArmBitsPerWord) {
5327       rot -= kArmBitsPerWord;
5328       std::swap(in_reg_hi, in_reg_lo);
5329     }
5330     // Rotate, or mov to out for zero or word size rotations.
5331     if (rot != 0u) {
5332       __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot));
5333       __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
5334       __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot));
5335       __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
5336     } else {
5337       __ Mov(out_reg_lo, in_reg_lo);
5338       __ Mov(out_reg_hi, in_reg_hi);
5339     }
5340   } else {
5341     vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
5342     vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
5343     vixl32::Label end;
5344     vixl32::Label shift_by_32_plus_shift_right;
5345     vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end);
5346 
5347     __ And(shift_right, RegisterFrom(rhs), 0x1F);
5348     __ Lsrs(shift_left, RegisterFrom(rhs), 6);
5349     __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
5350     __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false);
5351 
5352     // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
5353     // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
5354     __ Lsl(out_reg_hi, in_reg_hi, shift_left);
5355     __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5356     __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5357     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5358     __ Lsr(shift_left, in_reg_hi, shift_right);
5359     __ Add(out_reg_lo, out_reg_lo, shift_left);
5360     __ B(final_label);
5361 
5362     __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
5363     // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
5364     // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
5365     __ Lsr(out_reg_hi, in_reg_hi, shift_right);
5366     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5367     __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5368     __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5369     __ Lsl(shift_right, in_reg_hi, shift_left);
5370     __ Add(out_reg_lo, out_reg_lo, shift_right);
5371 
5372     if (end.IsReferenced()) {
5373       __ Bind(&end);
5374     }
5375   }
5376 }
5377 
VisitRor(HRor * ror)5378 void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
5379   LocationSummary* locations =
5380       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
5381   HInstruction* shift = ror->InputAt(1);
5382   switch (ror->GetResultType()) {
5383     case DataType::Type::kInt32: {
5384       locations->SetInAt(0, Location::RequiresRegister());
5385       locations->SetInAt(1, Location::RegisterOrConstant(shift));
5386       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5387       break;
5388     }
5389     case DataType::Type::kInt64: {
5390       locations->SetInAt(0, Location::RequiresRegister());
5391       if (shift->IsConstant()) {
5392         locations->SetInAt(1, Location::ConstantLocation(shift));
5393       } else {
5394         locations->SetInAt(1, Location::RequiresRegister());
5395         locations->AddTemp(Location::RequiresRegister());
5396         locations->AddTemp(Location::RequiresRegister());
5397       }
5398       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5399       break;
5400     }
5401     default:
5402       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
5403   }
5404 }
5405 
VisitRor(HRor * ror)5406 void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
5407   DataType::Type type = ror->GetResultType();
5408   switch (type) {
5409     case DataType::Type::kInt32: {
5410       HandleIntegerRotate(ror);
5411       break;
5412     }
5413     case DataType::Type::kInt64: {
5414       HandleLongRotate(ror);
5415       break;
5416     }
5417     default:
5418       LOG(FATAL) << "Unexpected operation type " << type;
5419       UNREACHABLE();
5420   }
5421 }
5422 
HandleShift(HBinaryOperation * op)5423 void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
5424   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5425 
5426   LocationSummary* locations =
5427       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
5428 
5429   HInstruction* shift = op->InputAt(1);
5430   switch (op->GetResultType()) {
5431     case DataType::Type::kInt32: {
5432       locations->SetInAt(0, Location::RequiresRegister());
5433       if (shift->IsConstant()) {
5434         locations->SetInAt(1, Location::ConstantLocation(shift));
5435         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5436       } else {
5437         locations->SetInAt(1, Location::RequiresRegister());
5438         // Make the output overlap, as it will be used to hold the masked
5439         // second input.
5440         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5441       }
5442       break;
5443     }
5444     case DataType::Type::kInt64: {
5445       locations->SetInAt(0, Location::RequiresRegister());
5446       if (shift->IsConstant()) {
5447         locations->SetInAt(1, Location::ConstantLocation(shift));
5448         // For simplicity, use kOutputOverlap even though we only require that low registers
5449         // don't clash with high registers which the register allocator currently guarantees.
5450         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5451       } else {
5452         locations->SetInAt(1, Location::RequiresRegister());
5453         locations->AddTemp(Location::RequiresRegister());
5454         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5455       }
5456       break;
5457     }
5458     default:
5459       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5460   }
5461 }
5462 
HandleShift(HBinaryOperation * op)5463 void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
5464   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5465 
5466   LocationSummary* locations = op->GetLocations();
5467   Location out = locations->Out();
5468   Location first = locations->InAt(0);
5469   Location second = locations->InAt(1);
5470 
5471   DataType::Type type = op->GetResultType();
5472   switch (type) {
5473     case DataType::Type::kInt32: {
5474       vixl32::Register out_reg = OutputRegister(op);
5475       vixl32::Register first_reg = InputRegisterAt(op, 0);
5476       if (second.IsRegister()) {
5477         vixl32::Register second_reg = RegisterFrom(second);
5478         // ARM doesn't mask the shift count so we need to do it ourselves.
5479         __ And(out_reg, second_reg, kMaxIntShiftDistance);
5480         if (op->IsShl()) {
5481           __ Lsl(out_reg, first_reg, out_reg);
5482         } else if (op->IsShr()) {
5483           __ Asr(out_reg, first_reg, out_reg);
5484         } else {
5485           __ Lsr(out_reg, first_reg, out_reg);
5486         }
5487       } else {
5488         int32_t cst = Int32ConstantFrom(second);
5489         uint32_t shift_value = cst & kMaxIntShiftDistance;
5490         if (shift_value == 0) {  // ARM does not support shifting with 0 immediate.
5491           __ Mov(out_reg, first_reg);
5492         } else if (op->IsShl()) {
5493           __ Lsl(out_reg, first_reg, shift_value);
5494         } else if (op->IsShr()) {
5495           __ Asr(out_reg, first_reg, shift_value);
5496         } else {
5497           __ Lsr(out_reg, first_reg, shift_value);
5498         }
5499       }
5500       break;
5501     }
5502     case DataType::Type::kInt64: {
5503       vixl32::Register o_h = HighRegisterFrom(out);
5504       vixl32::Register o_l = LowRegisterFrom(out);
5505 
5506       vixl32::Register high = HighRegisterFrom(first);
5507       vixl32::Register low = LowRegisterFrom(first);
5508 
5509       if (second.IsRegister()) {
5510         vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5511 
5512         vixl32::Register second_reg = RegisterFrom(second);
5513 
5514         if (op->IsShl()) {
5515           __ And(o_l, second_reg, kMaxLongShiftDistance);
5516           // Shift the high part
5517           __ Lsl(o_h, high, o_l);
5518           // Shift the low part and `or` what overflew on the high part
5519           __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord));
5520           __ Lsr(temp, low, temp);
5521           __ Orr(o_h, o_h, temp);
5522           // If the shift is > 32 bits, override the high part
5523           __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
5524           {
5525             ExactAssemblyScope guard(GetVIXLAssembler(),
5526                                      2 * vixl32::kMaxInstructionSizeInBytes,
5527                                      CodeBufferCheckScope::kMaximumSize);
5528             __ it(pl);
5529             __ lsl(pl, o_h, low, temp);
5530           }
5531           // Shift the low part
5532           __ Lsl(o_l, low, o_l);
5533         } else if (op->IsShr()) {
5534           __ And(o_h, second_reg, kMaxLongShiftDistance);
5535           // Shift the low part
5536           __ Lsr(o_l, low, o_h);
5537           // Shift the high part and `or` what underflew on the low part
5538           __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5539           __ Lsl(temp, high, temp);
5540           __ Orr(o_l, o_l, temp);
5541           // If the shift is > 32 bits, override the low part
5542           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5543           {
5544             ExactAssemblyScope guard(GetVIXLAssembler(),
5545                                      2 * vixl32::kMaxInstructionSizeInBytes,
5546                                      CodeBufferCheckScope::kMaximumSize);
5547             __ it(pl);
5548             __ asr(pl, o_l, high, temp);
5549           }
5550           // Shift the high part
5551           __ Asr(o_h, high, o_h);
5552         } else {
5553           __ And(o_h, second_reg, kMaxLongShiftDistance);
5554           // same as Shr except we use `Lsr`s and not `Asr`s
5555           __ Lsr(o_l, low, o_h);
5556           __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5557           __ Lsl(temp, high, temp);
5558           __ Orr(o_l, o_l, temp);
5559           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5560           {
5561             ExactAssemblyScope guard(GetVIXLAssembler(),
5562                                      2 * vixl32::kMaxInstructionSizeInBytes,
5563                                      CodeBufferCheckScope::kMaximumSize);
5564           __ it(pl);
5565           __ lsr(pl, o_l, high, temp);
5566           }
5567           __ Lsr(o_h, high, o_h);
5568         }
5569       } else {
5570         // Register allocator doesn't create partial overlap.
5571         DCHECK(!o_l.Is(high));
5572         DCHECK(!o_h.Is(low));
5573         int32_t cst = Int32ConstantFrom(second);
5574         uint32_t shift_value = cst & kMaxLongShiftDistance;
5575         if (shift_value > 32) {
5576           if (op->IsShl()) {
5577             __ Lsl(o_h, low, shift_value - 32);
5578             __ Mov(o_l, 0);
5579           } else if (op->IsShr()) {
5580             __ Asr(o_l, high, shift_value - 32);
5581             __ Asr(o_h, high, 31);
5582           } else {
5583             __ Lsr(o_l, high, shift_value - 32);
5584             __ Mov(o_h, 0);
5585           }
5586         } else if (shift_value == 32) {
5587           if (op->IsShl()) {
5588             __ Mov(o_h, low);
5589             __ Mov(o_l, 0);
5590           } else if (op->IsShr()) {
5591             __ Mov(o_l, high);
5592             __ Asr(o_h, high, 31);
5593           } else {
5594             __ Mov(o_l, high);
5595             __ Mov(o_h, 0);
5596           }
5597         } else if (shift_value == 1) {
5598           if (op->IsShl()) {
5599             __ Lsls(o_l, low, 1);
5600             __ Adc(o_h, high, high);
5601           } else if (op->IsShr()) {
5602             __ Asrs(o_h, high, 1);
5603             __ Rrx(o_l, low);
5604           } else {
5605             __ Lsrs(o_h, high, 1);
5606             __ Rrx(o_l, low);
5607           }
5608         } else if (shift_value == 0) {
5609           __ Mov(o_l, low);
5610           __ Mov(o_h, high);
5611         } else {
5612           DCHECK(0 < shift_value && shift_value < 32) << shift_value;
5613           if (op->IsShl()) {
5614             __ Lsl(o_h, high, shift_value);
5615             __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
5616             __ Lsl(o_l, low, shift_value);
5617           } else if (op->IsShr()) {
5618             __ Lsr(o_l, low, shift_value);
5619             __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5620             __ Asr(o_h, high, shift_value);
5621           } else {
5622             __ Lsr(o_l, low, shift_value);
5623             __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5624             __ Lsr(o_h, high, shift_value);
5625           }
5626         }
5627       }
5628       break;
5629     }
5630     default:
5631       LOG(FATAL) << "Unexpected operation type " << type;
5632       UNREACHABLE();
5633   }
5634 }
5635 
VisitShl(HShl * shl)5636 void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
5637   HandleShift(shl);
5638 }
5639 
VisitShl(HShl * shl)5640 void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
5641   HandleShift(shl);
5642 }
5643 
VisitShr(HShr * shr)5644 void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
5645   HandleShift(shr);
5646 }
5647 
VisitShr(HShr * shr)5648 void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
5649   HandleShift(shr);
5650 }
5651 
VisitUShr(HUShr * ushr)5652 void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
5653   HandleShift(ushr);
5654 }
5655 
VisitUShr(HUShr * ushr)5656 void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
5657   HandleShift(ushr);
5658 }
5659 
VisitNewInstance(HNewInstance * instruction)5660 void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5661   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5662       instruction, LocationSummary::kCallOnMainOnly);
5663   InvokeRuntimeCallingConventionARMVIXL calling_convention;
5664   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5665   locations->SetOut(LocationFrom(r0));
5666 }
5667 
VisitNewInstance(HNewInstance * instruction)5668 void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5669   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5670   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5671   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12);
5672 }
5673 
VisitNewArray(HNewArray * instruction)5674 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
5675   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5676       instruction, LocationSummary::kCallOnMainOnly);
5677   InvokeRuntimeCallingConventionARMVIXL calling_convention;
5678   locations->SetOut(LocationFrom(r0));
5679   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5680   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5681 }
5682 
VisitNewArray(HNewArray * instruction)5683 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
5684   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5685   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5686   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5687   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5688   DCHECK(!codegen_->IsLeafMethod());
5689   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13);
5690 }
5691 
VisitParameterValue(HParameterValue * instruction)5692 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
5693   LocationSummary* locations =
5694       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5695   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5696   if (location.IsStackSlot()) {
5697     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5698   } else if (location.IsDoubleStackSlot()) {
5699     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5700   }
5701   locations->SetOut(location);
5702 }
5703 
VisitParameterValue(HParameterValue * instruction)5704 void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
5705     [[maybe_unused]] HParameterValue* instruction) {
5706   // Nothing to do, the parameter is already at its location.
5707 }
5708 
VisitCurrentMethod(HCurrentMethod * instruction)5709 void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
5710   LocationSummary* locations =
5711       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5712   locations->SetOut(LocationFrom(kMethodRegister));
5713 }
5714 
VisitCurrentMethod(HCurrentMethod * instruction)5715 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
5716     [[maybe_unused]] HCurrentMethod* instruction) {
5717   // Nothing to do, the method is already at its location.
5718 }
5719 
VisitNot(HNot * not_)5720 void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
5721   LocationSummary* locations =
5722       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5723   locations->SetInAt(0, Location::RequiresRegister());
5724   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5725 }
5726 
VisitNot(HNot * not_)5727 void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
5728   LocationSummary* locations = not_->GetLocations();
5729   Location out = locations->Out();
5730   Location in = locations->InAt(0);
5731   switch (not_->GetResultType()) {
5732     case DataType::Type::kInt32:
5733       __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
5734       break;
5735 
5736     case DataType::Type::kInt64:
5737       __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
5738       __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
5739       break;
5740 
5741     default:
5742       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5743   }
5744 }
5745 
VisitBooleanNot(HBooleanNot * bool_not)5746 void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5747   LocationSummary* locations =
5748       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5749   locations->SetInAt(0, Location::RequiresRegister());
5750   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5751 }
5752 
VisitBooleanNot(HBooleanNot * bool_not)5753 void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5754   __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
5755 }
5756 
VisitCompare(HCompare * compare)5757 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
5758   LocationSummary* locations =
5759       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5760   switch (compare->InputAt(0)->GetType()) {
5761     case DataType::Type::kBool:
5762     case DataType::Type::kUint8:
5763     case DataType::Type::kInt8:
5764     case DataType::Type::kUint16:
5765     case DataType::Type::kInt16:
5766     case DataType::Type::kInt32:
5767     case DataType::Type::kInt64: {
5768       locations->SetInAt(0, Location::RequiresRegister());
5769       locations->SetInAt(1, Location::RequiresRegister());
5770       // Output overlaps because it is written before doing the low comparison.
5771       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5772       break;
5773     }
5774     case DataType::Type::kFloat32:
5775     case DataType::Type::kFloat64: {
5776       locations->SetInAt(0, Location::RequiresFpuRegister());
5777       locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
5778       locations->SetOut(Location::RequiresRegister());
5779       break;
5780     }
5781     default:
5782       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5783   }
5784 }
5785 
VisitCompare(HCompare * compare)5786 void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
5787   LocationSummary* locations = compare->GetLocations();
5788   vixl32::Register out = OutputRegister(compare);
5789   Location left = locations->InAt(0);
5790   Location right = locations->InAt(1);
5791 
5792   vixl32::Label less, greater, done;
5793   vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
5794   DataType::Type type = compare->InputAt(0)->GetType();
5795   vixl32::Condition less_cond = vixl32::Condition::None();
5796   switch (type) {
5797     case DataType::Type::kBool:
5798     case DataType::Type::kUint8:
5799     case DataType::Type::kInt8:
5800     case DataType::Type::kUint16:
5801     case DataType::Type::kInt16:
5802     case DataType::Type::kInt32: {
5803       // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
5804       __ Mov(out, 0);
5805       __ Cmp(RegisterFrom(left), RegisterFrom(right));  // Signed compare.
5806       less_cond = lt;
5807       break;
5808     }
5809     case DataType::Type::kInt64: {
5810       __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));  // Signed compare.
5811       __ B(lt, &less, /* is_far_target= */ false);
5812       __ B(gt, &greater, /* is_far_target= */ false);
5813       // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
5814       __ Mov(out, 0);
5815       __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));  // Unsigned compare.
5816       less_cond = lo;
5817       break;
5818     }
5819     case DataType::Type::kFloat32:
5820     case DataType::Type::kFloat64: {
5821       __ Mov(out, 0);
5822       GenerateVcmp(compare, codegen_);
5823       // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
5824       __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5825       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
5826       break;
5827     }
5828     default:
5829       LOG(FATAL) << "Unexpected compare type " << type;
5830       UNREACHABLE();
5831   }
5832 
5833   __ B(eq, final_label, /* is_far_target= */ false);
5834   __ B(less_cond, &less, /* is_far_target= */ false);
5835 
5836   __ Bind(&greater);
5837   __ Mov(out, 1);
5838   __ B(final_label);
5839 
5840   __ Bind(&less);
5841   __ Mov(out, -1);
5842 
5843   if (done.IsReferenced()) {
5844     __ Bind(&done);
5845   }
5846 }
5847 
VisitPhi(HPhi * instruction)5848 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
5849   LocationSummary* locations =
5850       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5851   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5852     locations->SetInAt(i, Location::Any());
5853   }
5854   locations->SetOut(Location::Any());
5855 }
5856 
VisitPhi(HPhi * instruction)5857 void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) {
5858   LOG(FATAL) << "Unreachable";
5859 }
5860 
GenerateMemoryBarrier(MemBarrierKind kind)5861 void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
5862   // TODO (ported from quick): revisit ARM barrier kinds.
5863   DmbOptions flavor = DmbOptions::ISH;  // Quiet C++ warnings.
5864   switch (kind) {
5865     case MemBarrierKind::kAnyStore:
5866     case MemBarrierKind::kLoadAny:
5867     case MemBarrierKind::kAnyAny: {
5868       flavor = DmbOptions::ISH;
5869       break;
5870     }
5871     case MemBarrierKind::kStoreStore: {
5872       flavor = DmbOptions::ISHST;
5873       break;
5874     }
5875     default:
5876       LOG(FATAL) << "Unexpected memory barrier " << kind;
5877   }
5878   __ Dmb(flavor);
5879 }
5880 
GenerateWideAtomicLoad(vixl32::Register addr,uint32_t offset,vixl32::Register out_lo,vixl32::Register out_hi)5881 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
5882                                                              uint32_t offset,
5883                                                              vixl32::Register out_lo,
5884                                                              vixl32::Register out_hi) {
5885   UseScratchRegisterScope temps(GetVIXLAssembler());
5886   if (offset != 0) {
5887     vixl32::Register temp = temps.Acquire();
5888     __ Add(temp, addr, offset);
5889     addr = temp;
5890   }
5891   __ Ldrexd(out_lo, out_hi, MemOperand(addr));
5892 }
5893 
GenerateWideAtomicStore(vixl32::Register addr,uint32_t offset,vixl32::Register value_lo,vixl32::Register value_hi,vixl32::Register temp1,vixl32::Register temp2,HInstruction * instruction)5894 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
5895                                                               uint32_t offset,
5896                                                               vixl32::Register value_lo,
5897                                                               vixl32::Register value_hi,
5898                                                               vixl32::Register temp1,
5899                                                               vixl32::Register temp2,
5900                                                               HInstruction* instruction) {
5901   UseScratchRegisterScope temps(GetVIXLAssembler());
5902   vixl32::Label fail;
5903   if (offset != 0) {
5904     vixl32::Register temp = temps.Acquire();
5905     __ Add(temp, addr, offset);
5906     addr = temp;
5907   }
5908   __ Bind(&fail);
5909   {
5910     // Ensure the pc position is recorded immediately after the `ldrexd` instruction.
5911     ExactAssemblyScope aas(GetVIXLAssembler(),
5912                            vixl32::kMaxInstructionSizeInBytes,
5913                            CodeBufferCheckScope::kMaximumSize);
5914     // We need a load followed by store. (The address used in a STREX instruction must
5915     // be the same as the address in the most recently executed LDREX instruction.)
5916     __ ldrexd(temp1, temp2, MemOperand(addr));
5917     codegen_->MaybeRecordImplicitNullCheck(instruction);
5918   }
5919   __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
5920   __ CompareAndBranchIfNonZero(temp1, &fail);
5921 }
5922 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5923 void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction,
5924                                              const FieldInfo& field_info,
5925                                              WriteBarrierKind write_barrier_kind) {
5926   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5927 
5928   LocationSummary* locations =
5929       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5930   locations->SetInAt(0, Location::RequiresRegister());
5931 
5932   DataType::Type field_type = field_info.GetFieldType();
5933   if (DataType::IsFloatingPointType(field_type)) {
5934     locations->SetInAt(1, Location::RequiresFpuRegister());
5935   } else {
5936     locations->SetInAt(1, Location::RequiresRegister());
5937   }
5938 
5939   bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64;
5940   bool generate_volatile = field_info.IsVolatile()
5941       && is_wide
5942       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5943   bool needs_write_barrier =
5944       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5945   bool check_gc_card =
5946       codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
5947 
5948   // Temporary registers for the write barrier.
5949   // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
5950   if (needs_write_barrier || check_gc_card) {
5951     locations->AddTemp(Location::RequiresRegister());
5952     locations->AddTemp(Location::RequiresRegister());
5953   } else if (generate_volatile) {
5954     // ARM encoding have some additional constraints for ldrexd/strexd:
5955     // - registers need to be consecutive
5956     // - the first register should be even but not R14.
5957     // We don't test for ARM yet, and the assertion makes sure that we
5958     // revisit this if we ever enable ARM encoding.
5959     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
5960 
5961     locations->AddTemp(Location::RequiresRegister());
5962     locations->AddTemp(Location::RequiresRegister());
5963     if (field_type == DataType::Type::kFloat64) {
5964       // For doubles we need two more registers to copy the value.
5965       locations->AddTemp(LocationFrom(r2));
5966       locations->AddTemp(LocationFrom(r3));
5967     }
5968   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5969     locations->AddTemp(Location::RequiresRegister());
5970   }
5971 }
5972 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5973 void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
5974                                                      const FieldInfo& field_info,
5975                                                      bool value_can_be_null,
5976                                                      WriteBarrierKind write_barrier_kind) {
5977   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5978 
5979   LocationSummary* locations = instruction->GetLocations();
5980   vixl32::Register base = InputRegisterAt(instruction, 0);
5981   Location value = locations->InAt(1);
5982 
5983   bool is_volatile = field_info.IsVolatile();
5984   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5985   DataType::Type field_type = field_info.GetFieldType();
5986   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5987   bool needs_write_barrier =
5988       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5989 
5990   if (is_volatile) {
5991     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5992   }
5993 
5994   switch (field_type) {
5995     case DataType::Type::kBool:
5996     case DataType::Type::kUint8:
5997     case DataType::Type::kInt8:
5998     case DataType::Type::kUint16:
5999     case DataType::Type::kInt16:
6000     case DataType::Type::kInt32: {
6001       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6002       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6003       StoreOperandType operand_type = GetStoreOperandType(field_type);
6004       GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset);
6005       codegen_->MaybeRecordImplicitNullCheck(instruction);
6006       break;
6007     }
6008 
6009     case DataType::Type::kReference: {
6010       vixl32::Register value_reg = RegisterFrom(value);
6011       if (kPoisonHeapReferences) {
6012         DCHECK_EQ(field_type, DataType::Type::kReference);
6013         value_reg = RegisterFrom(locations->GetTemp(0));
6014         __ Mov(value_reg, RegisterFrom(value));
6015         GetAssembler()->PoisonHeapReference(value_reg);
6016       }
6017       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6018       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6019       GetAssembler()->StoreToOffset(kStoreWord, value_reg, base, offset);
6020       codegen_->MaybeRecordImplicitNullCheck(instruction);
6021       break;
6022     }
6023 
6024     case DataType::Type::kInt64: {
6025       if (is_volatile && !atomic_ldrd_strd) {
6026         GenerateWideAtomicStore(base,
6027                                 offset,
6028                                 LowRegisterFrom(value),
6029                                 HighRegisterFrom(value),
6030                                 RegisterFrom(locations->GetTemp(0)),
6031                                 RegisterFrom(locations->GetTemp(1)),
6032                                 instruction);
6033       } else {
6034         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6035         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6036         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
6037         codegen_->MaybeRecordImplicitNullCheck(instruction);
6038       }
6039       break;
6040     }
6041 
6042     case DataType::Type::kFloat32: {
6043       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6044       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6045       GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
6046       codegen_->MaybeRecordImplicitNullCheck(instruction);
6047       break;
6048     }
6049 
6050     case DataType::Type::kFloat64: {
6051       vixl32::DRegister value_reg = DRegisterFrom(value);
6052       if (is_volatile && !atomic_ldrd_strd) {
6053         vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
6054         vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
6055 
6056         __ Vmov(value_reg_lo, value_reg_hi, value_reg);
6057 
6058         GenerateWideAtomicStore(base,
6059                                 offset,
6060                                 value_reg_lo,
6061                                 value_reg_hi,
6062                                 RegisterFrom(locations->GetTemp(2)),
6063                                 RegisterFrom(locations->GetTemp(3)),
6064                                 instruction);
6065       } else {
6066         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6067         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6068         GetAssembler()->StoreDToOffset(value_reg, base, offset);
6069         codegen_->MaybeRecordImplicitNullCheck(instruction);
6070       }
6071       break;
6072     }
6073 
6074     case DataType::Type::kUint32:
6075     case DataType::Type::kUint64:
6076     case DataType::Type::kVoid:
6077       LOG(FATAL) << "Unreachable type " << field_type;
6078       UNREACHABLE();
6079   }
6080 
6081   if (needs_write_barrier) {
6082     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6083     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6084     codegen_->MaybeMarkGCCard(
6085         temp,
6086         card,
6087         base,
6088         RegisterFrom(value),
6089         value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
6090   } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
6091     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
6092     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
6093     codegen_->CheckGCCardIsValid(temp, card, base);
6094   }
6095 
6096   if (is_volatile) {
6097     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6098   }
6099 }
6100 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6101 void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
6102                                              const FieldInfo& field_info) {
6103   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6104 
6105   bool object_field_get_with_read_barrier =
6106       (field_info.GetFieldType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6107   LocationSummary* locations =
6108       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6109                                                        object_field_get_with_read_barrier
6110                                                            ? LocationSummary::kCallOnSlowPath
6111                                                            : LocationSummary::kNoCall);
6112   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6113     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6114   }
6115   // Input for object receiver.
6116   locations->SetInAt(0, Location::RequiresRegister());
6117 
6118   bool volatile_for_double = field_info.IsVolatile()
6119       && (field_info.GetFieldType() == DataType::Type::kFloat64)
6120       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6121   // The output overlaps in case of volatile long: we don't want the
6122   // code generated by GenerateWideAtomicLoad to overwrite the
6123   // object's location.  Likewise, in the case of an object field get
6124   // with read barriers enabled, we do not want the load to overwrite
6125   // the object's location, as we need it to emit the read barrier.
6126   bool overlap =
6127       (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) ||
6128       object_field_get_with_read_barrier;
6129 
6130   if (DataType::IsFloatingPointType(instruction->GetType())) {
6131     locations->SetOut(Location::RequiresFpuRegister());
6132   } else {
6133     locations->SetOut(Location::RequiresRegister(),
6134                       (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
6135   }
6136   if (volatile_for_double) {
6137     // ARM encoding have some additional constraints for ldrexd/strexd:
6138     // - registers need to be consecutive
6139     // - the first register should be even but not R14.
6140     // We don't test for ARM yet, and the assertion makes sure that we
6141     // revisit this if we ever enable ARM encoding.
6142     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
6143     locations->AddTemp(Location::RequiresRegister());
6144     locations->AddTemp(Location::RequiresRegister());
6145   } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6146     // We need a temporary register for the read barrier load in
6147     // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6148     // only if the offset is too big.
6149     if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
6150       locations->AddTemp(Location::RequiresRegister());
6151     }
6152   }
6153 }
6154 
ArithmeticZeroOrFpuRegister(HInstruction * input)6155 Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
6156   DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
6157   if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
6158       (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
6159     return Location::ConstantLocation(input);
6160   } else {
6161     return Location::RequiresFpuRegister();
6162   }
6163 }
6164 
ArmEncodableConstantOrRegister(HInstruction * constant,Opcode opcode)6165 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
6166                                                                  Opcode opcode) {
6167   DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
6168   if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
6169     return Location::ConstantLocation(constant);
6170   }
6171   return Location::RequiresRegister();
6172 }
6173 
CanEncode32BitConstantAsImmediate(CodeGeneratorARMVIXL * codegen,uint32_t value,Opcode opcode,vixl32::FlagsUpdate flags_update=vixl32::FlagsUpdate::DontCare)6174 static bool CanEncode32BitConstantAsImmediate(
6175     CodeGeneratorARMVIXL* codegen,
6176     uint32_t value,
6177     Opcode opcode,
6178     vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) {
6179   ArmVIXLAssembler* assembler = codegen->GetAssembler();
6180   if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) {
6181     return true;
6182   }
6183   Opcode neg_opcode = kNoOperand;
6184   uint32_t neg_value = 0;
6185   switch (opcode) {
6186     case AND: neg_opcode = BIC; neg_value = ~value; break;
6187     case ORR: neg_opcode = ORN; neg_value = ~value; break;
6188     case ADD: neg_opcode = SUB; neg_value = -value; break;
6189     case ADC: neg_opcode = SBC; neg_value = ~value; break;
6190     case SUB: neg_opcode = ADD; neg_value = -value; break;
6191     case SBC: neg_opcode = ADC; neg_value = ~value; break;
6192     case MOV: neg_opcode = MVN; neg_value = ~value; break;
6193     default:
6194       return false;
6195   }
6196 
6197   if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) {
6198     return true;
6199   }
6200 
6201   return opcode == AND && IsPowerOfTwo(value + 1);
6202 }
6203 
CanEncodeConstantAsImmediate(HConstant * input_cst,Opcode opcode)6204 bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) {
6205   uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
6206   if (DataType::Is64BitType(input_cst->GetType())) {
6207     Opcode high_opcode = opcode;
6208     vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare;
6209     switch (opcode) {
6210       case SUB:
6211         // Flip the operation to an ADD.
6212         value = -value;
6213         opcode = ADD;
6214         FALLTHROUGH_INTENDED;
6215       case ADD:
6216         if (Low32Bits(value) == 0u) {
6217           return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode);
6218         }
6219         high_opcode = ADC;
6220         low_flags_update = vixl32::FlagsUpdate::SetFlags;
6221         break;
6222       default:
6223         break;
6224     }
6225     return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) &&
6226            CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update);
6227   } else {
6228     return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode);
6229   }
6230 }
6231 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6232 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
6233                                                      const FieldInfo& field_info) {
6234   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6235 
6236   LocationSummary* locations = instruction->GetLocations();
6237   uint32_t receiver_input = 0;
6238   vixl32::Register base = InputRegisterAt(instruction, receiver_input);
6239   Location out = locations->Out();
6240   bool is_volatile = field_info.IsVolatile();
6241   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6242   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6243   DataType::Type load_type = instruction->GetType();
6244   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6245 
6246   switch (load_type) {
6247     case DataType::Type::kBool:
6248     case DataType::Type::kUint8:
6249     case DataType::Type::kInt8:
6250     case DataType::Type::kUint16:
6251     case DataType::Type::kInt16:
6252     case DataType::Type::kInt32: {
6253       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6254       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6255       LoadOperandType operand_type = GetLoadOperandType(load_type);
6256       GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
6257       codegen_->MaybeRecordImplicitNullCheck(instruction);
6258       break;
6259     }
6260 
6261     case DataType::Type::kReference: {
6262       // /* HeapReference<Object> */ out = *(base + offset)
6263       if (codegen_->EmitBakerReadBarrier()) {
6264         Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6265         // Note that a potential implicit null check is handled in this
6266         // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
6267         codegen_->GenerateFieldLoadWithBakerReadBarrier(
6268             instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true);
6269         if (is_volatile) {
6270           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6271         }
6272       } else {
6273         {
6274           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6275           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6276           GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
6277           codegen_->MaybeRecordImplicitNullCheck(instruction);
6278         }
6279         if (is_volatile) {
6280           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6281         }
6282         // If read barriers are enabled, emit read barriers other than
6283         // Baker's using a slow path (and also unpoison the loaded
6284         // reference, if heap poisoning is enabled).
6285         codegen_->MaybeGenerateReadBarrierSlow(
6286             instruction, out, out, locations->InAt(receiver_input), offset);
6287       }
6288       break;
6289     }
6290 
6291     case DataType::Type::kInt64: {
6292       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6293       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6294       if (is_volatile && !atomic_ldrd_strd) {
6295         GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
6296       } else {
6297         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
6298       }
6299       codegen_->MaybeRecordImplicitNullCheck(instruction);
6300       break;
6301     }
6302 
6303     case DataType::Type::kFloat32: {
6304       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6305       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6306       GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
6307       codegen_->MaybeRecordImplicitNullCheck(instruction);
6308       break;
6309     }
6310 
6311     case DataType::Type::kFloat64: {
6312       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6313       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6314       vixl32::DRegister out_dreg = DRegisterFrom(out);
6315       if (is_volatile && !atomic_ldrd_strd) {
6316         vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
6317         vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
6318         GenerateWideAtomicLoad(base, offset, lo, hi);
6319         codegen_->MaybeRecordImplicitNullCheck(instruction);
6320         __ Vmov(out_dreg, lo, hi);
6321       } else {
6322         GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
6323         codegen_->MaybeRecordImplicitNullCheck(instruction);
6324       }
6325       break;
6326     }
6327 
6328     case DataType::Type::kUint32:
6329     case DataType::Type::kUint64:
6330     case DataType::Type::kVoid:
6331       LOG(FATAL) << "Unreachable type " << load_type;
6332       UNREACHABLE();
6333   }
6334 
6335   if (is_volatile) {
6336     if (load_type == DataType::Type::kReference) {
6337       // Memory barriers, in the case of references, are also handled
6338       // in the previous switch statement.
6339     } else {
6340       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6341     }
6342   }
6343 }
6344 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6345 void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6346   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6347 }
6348 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6349 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6350   HandleFieldSet(instruction,
6351                  instruction->GetFieldInfo(),
6352                  instruction->GetValueCanBeNull(),
6353                  instruction->GetWriteBarrierKind());
6354 }
6355 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6356 void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6357   HandleFieldGet(instruction, instruction->GetFieldInfo());
6358 }
6359 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6360 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6361   HandleFieldGet(instruction, instruction->GetFieldInfo());
6362 }
6363 
VisitStaticFieldGet(HStaticFieldGet * instruction)6364 void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6365   HandleFieldGet(instruction, instruction->GetFieldInfo());
6366 }
6367 
VisitStaticFieldGet(HStaticFieldGet * instruction)6368 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6369   HandleFieldGet(instruction, instruction->GetFieldInfo());
6370 }
6371 
VisitStaticFieldSet(HStaticFieldSet * instruction)6372 void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6373   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6374 }
6375 
VisitStaticFieldSet(HStaticFieldSet * instruction)6376 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6377   HandleFieldSet(instruction,
6378                  instruction->GetFieldInfo(),
6379                  instruction->GetValueCanBeNull(),
6380                  instruction->GetWriteBarrierKind());
6381 }
6382 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6383 void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6384   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0));
6385 }
6386 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6387 void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6388   __ Mov(r0, instruction->GetFormat()->GetValue());
6389   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6390 }
6391 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6392 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
6393     HUnresolvedInstanceFieldGet* instruction) {
6394   FieldAccessCallingConventionARMVIXL calling_convention;
6395   codegen_->CreateUnresolvedFieldLocationSummary(
6396       instruction, instruction->GetFieldType(), calling_convention);
6397 }
6398 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6399 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
6400     HUnresolvedInstanceFieldGet* instruction) {
6401   FieldAccessCallingConventionARMVIXL calling_convention;
6402   codegen_->GenerateUnresolvedFieldAccess(instruction,
6403                                           instruction->GetFieldType(),
6404                                           instruction->GetFieldIndex(),
6405                                           instruction->GetDexPc(),
6406                                           calling_convention);
6407 }
6408 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6409 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
6410     HUnresolvedInstanceFieldSet* instruction) {
6411   FieldAccessCallingConventionARMVIXL calling_convention;
6412   codegen_->CreateUnresolvedFieldLocationSummary(
6413       instruction, instruction->GetFieldType(), calling_convention);
6414 }
6415 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6416 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
6417     HUnresolvedInstanceFieldSet* instruction) {
6418   FieldAccessCallingConventionARMVIXL calling_convention;
6419   codegen_->GenerateUnresolvedFieldAccess(instruction,
6420                                           instruction->GetFieldType(),
6421                                           instruction->GetFieldIndex(),
6422                                           instruction->GetDexPc(),
6423                                           calling_convention);
6424 }
6425 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6426 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
6427     HUnresolvedStaticFieldGet* instruction) {
6428   FieldAccessCallingConventionARMVIXL calling_convention;
6429   codegen_->CreateUnresolvedFieldLocationSummary(
6430       instruction, instruction->GetFieldType(), calling_convention);
6431 }
6432 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6433 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
6434     HUnresolvedStaticFieldGet* instruction) {
6435   FieldAccessCallingConventionARMVIXL calling_convention;
6436   codegen_->GenerateUnresolvedFieldAccess(instruction,
6437                                           instruction->GetFieldType(),
6438                                           instruction->GetFieldIndex(),
6439                                           instruction->GetDexPc(),
6440                                           calling_convention);
6441 }
6442 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6443 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
6444     HUnresolvedStaticFieldSet* instruction) {
6445   FieldAccessCallingConventionARMVIXL calling_convention;
6446   codegen_->CreateUnresolvedFieldLocationSummary(
6447       instruction, instruction->GetFieldType(), calling_convention);
6448 }
6449 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6450 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
6451     HUnresolvedStaticFieldSet* instruction) {
6452   FieldAccessCallingConventionARMVIXL calling_convention;
6453   codegen_->GenerateUnresolvedFieldAccess(instruction,
6454                                           instruction->GetFieldType(),
6455                                           instruction->GetFieldIndex(),
6456                                           instruction->GetDexPc(),
6457                                           calling_convention);
6458 }
6459 
VisitNullCheck(HNullCheck * instruction)6460 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6461   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6462   locations->SetInAt(0, Location::RequiresRegister());
6463 }
6464 
GenerateImplicitNullCheck(HNullCheck * instruction)6465 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
6466   if (CanMoveNullCheckToUser(instruction)) {
6467     return;
6468   }
6469 
6470   UseScratchRegisterScope temps(GetVIXLAssembler());
6471   // Ensure the pc position is recorded immediately after the `ldr` instruction.
6472   ExactAssemblyScope aas(GetVIXLAssembler(),
6473                          vixl32::kMaxInstructionSizeInBytes,
6474                          CodeBufferCheckScope::kMaximumSize);
6475   __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
6476   RecordPcInfo(instruction, instruction->GetDexPc());
6477 }
6478 
GenerateExplicitNullCheck(HNullCheck * instruction)6479 void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
6480   NullCheckSlowPathARMVIXL* slow_path =
6481       new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction);
6482   AddSlowPath(slow_path);
6483   __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
6484 }
6485 
VisitNullCheck(HNullCheck * instruction)6486 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6487   codegen_->GenerateNullCheck(instruction);
6488 }
6489 
LoadFromShiftedRegOffset(DataType::Type type,Location out_loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6490 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type,
6491                                                     Location out_loc,
6492                                                     vixl32::Register base,
6493                                                     vixl32::Register reg_index,
6494                                                     vixl32::Condition cond) {
6495   uint32_t shift_count = DataType::SizeShift(type);
6496   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6497 
6498   switch (type) {
6499     case DataType::Type::kBool:
6500     case DataType::Type::kUint8:
6501       __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
6502       break;
6503     case DataType::Type::kInt8:
6504       __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
6505       break;
6506     case DataType::Type::kUint16:
6507       __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
6508       break;
6509     case DataType::Type::kInt16:
6510       __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
6511       break;
6512     case DataType::Type::kReference:
6513     case DataType::Type::kInt32:
6514       __ Ldr(cond, RegisterFrom(out_loc), mem_address);
6515       break;
6516     // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
6517     case DataType::Type::kInt64:
6518     case DataType::Type::kFloat32:
6519     case DataType::Type::kFloat64:
6520     default:
6521       LOG(FATAL) << "Unreachable type " << type;
6522       UNREACHABLE();
6523   }
6524 }
6525 
StoreToShiftedRegOffset(DataType::Type type,Location loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6526 void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
6527                                                    Location loc,
6528                                                    vixl32::Register base,
6529                                                    vixl32::Register reg_index,
6530                                                    vixl32::Condition cond) {
6531   uint32_t shift_count = DataType::SizeShift(type);
6532   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6533 
6534   switch (type) {
6535     case DataType::Type::kBool:
6536     case DataType::Type::kUint8:
6537     case DataType::Type::kInt8:
6538       __ Strb(cond, RegisterFrom(loc), mem_address);
6539       break;
6540     case DataType::Type::kUint16:
6541     case DataType::Type::kInt16:
6542       __ Strh(cond, RegisterFrom(loc), mem_address);
6543       break;
6544     case DataType::Type::kReference:
6545     case DataType::Type::kInt32:
6546       __ Str(cond, RegisterFrom(loc), mem_address);
6547       break;
6548     // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
6549     case DataType::Type::kInt64:
6550     case DataType::Type::kFloat32:
6551     case DataType::Type::kFloat64:
6552     default:
6553       LOG(FATAL) << "Unreachable type " << type;
6554       UNREACHABLE();
6555   }
6556 }
6557 
VisitArrayGet(HArrayGet * instruction)6558 void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6559   bool object_array_get_with_read_barrier =
6560       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6561   LocationSummary* locations =
6562       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6563                                                        object_array_get_with_read_barrier
6564                                                            ? LocationSummary::kCallOnSlowPath
6565                                                            : LocationSummary::kNoCall);
6566   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6567     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6568   }
6569   locations->SetInAt(0, Location::RequiresRegister());
6570   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6571   if (DataType::IsFloatingPointType(instruction->GetType())) {
6572     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6573   } else {
6574     // The output overlaps in the case of an object array get with
6575     // read barriers enabled: we do not want the move to overwrite the
6576     // array's location, as we need it to emit the read barrier.
6577     locations->SetOut(
6578         Location::RequiresRegister(),
6579         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
6580   }
6581   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6582     if (instruction->GetIndex()->IsConstant()) {
6583       // Array loads with constant index are treated as field loads.
6584       // We need a temporary register for the read barrier load in
6585       // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6586       // only if the offset is too big.
6587       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
6588       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
6589       offset += index << DataType::SizeShift(DataType::Type::kReference);
6590       if (offset >= kReferenceLoadMinFarOffset) {
6591         locations->AddTemp(Location::RequiresRegister());
6592       }
6593     } else {
6594       // We need a non-scratch temporary for the array data pointer in
6595       // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
6596       locations->AddTemp(Location::RequiresRegister());
6597     }
6598   } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6599     // Also need a temporary for String compression feature.
6600     locations->AddTemp(Location::RequiresRegister());
6601   }
6602 }
6603 
VisitArrayGet(HArrayGet * instruction)6604 void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6605   LocationSummary* locations = instruction->GetLocations();
6606   Location obj_loc = locations->InAt(0);
6607   vixl32::Register obj = InputRegisterAt(instruction, 0);
6608   Location index = locations->InAt(1);
6609   Location out_loc = locations->Out();
6610   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6611   DataType::Type type = instruction->GetType();
6612   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
6613                                         instruction->IsStringCharAt();
6614   HInstruction* array_instr = instruction->GetArray();
6615   bool has_intermediate_address = array_instr->IsIntermediateAddress();
6616 
6617   switch (type) {
6618     case DataType::Type::kBool:
6619     case DataType::Type::kUint8:
6620     case DataType::Type::kInt8:
6621     case DataType::Type::kUint16:
6622     case DataType::Type::kInt16:
6623     case DataType::Type::kInt32: {
6624       vixl32::Register length;
6625       if (maybe_compressed_char_at) {
6626         length = RegisterFrom(locations->GetTemp(0));
6627         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6628         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6629         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6630         GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
6631         codegen_->MaybeRecordImplicitNullCheck(instruction);
6632       }
6633       if (index.IsConstant()) {
6634         int32_t const_index = Int32ConstantFrom(index);
6635         if (maybe_compressed_char_at) {
6636           vixl32::Label uncompressed_load, done;
6637           vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6638           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
6639           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6640                         "Expecting 0=compressed, 1=uncompressed");
6641           __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6642           GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
6643                                          RegisterFrom(out_loc),
6644                                          obj,
6645                                          data_offset + const_index);
6646           __ B(final_label);
6647           __ Bind(&uncompressed_load);
6648           GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16),
6649                                          RegisterFrom(out_loc),
6650                                          obj,
6651                                          data_offset + (const_index << 1));
6652           if (done.IsReferenced()) {
6653             __ Bind(&done);
6654           }
6655         } else {
6656           uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type));
6657 
6658           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6659           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6660           LoadOperandType load_type = GetLoadOperandType(type);
6661           GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
6662           codegen_->MaybeRecordImplicitNullCheck(instruction);
6663         }
6664       } else {
6665         UseScratchRegisterScope temps(GetVIXLAssembler());
6666         vixl32::Register temp = temps.Acquire();
6667 
6668         if (has_intermediate_address) {
6669           // We do not need to compute the intermediate address from the array: the
6670           // input instruction has done it already. See the comment in
6671           // `TryExtractArrayAccessAddress()`.
6672           if (kIsDebugBuild) {
6673             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6674             DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6675           }
6676           temp = obj;
6677         } else {
6678           __ Add(temp, obj, data_offset);
6679         }
6680         if (maybe_compressed_char_at) {
6681           vixl32::Label uncompressed_load, done;
6682           vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6683           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
6684           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6685                         "Expecting 0=compressed, 1=uncompressed");
6686           __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6687           __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
6688           __ B(final_label);
6689           __ Bind(&uncompressed_load);
6690           __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
6691           if (done.IsReferenced()) {
6692             __ Bind(&done);
6693           }
6694         } else {
6695           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6696           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6697           codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6698           codegen_->MaybeRecordImplicitNullCheck(instruction);
6699         }
6700       }
6701       break;
6702     }
6703 
6704     case DataType::Type::kReference: {
6705       // The read barrier instrumentation of object ArrayGet
6706       // instructions does not support the HIntermediateAddress
6707       // instruction.
6708       DCHECK(!(has_intermediate_address && codegen_->EmitReadBarrier()));
6709 
6710       static_assert(
6711           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6712           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6713       // /* HeapReference<Object> */ out =
6714       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6715       if (codegen_->EmitBakerReadBarrier()) {
6716         // Note that a potential implicit null check is handled in this
6717         // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
6718         DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
6719         if (index.IsConstant()) {
6720           // Array load with a constant index can be treated as a field load.
6721           Location maybe_temp =
6722               (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6723           data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
6724           codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6725                                                           out_loc,
6726                                                           obj,
6727                                                           data_offset,
6728                                                           maybe_temp,
6729                                                           /* needs_null_check= */ false);
6730         } else {
6731           Location temp = locations->GetTemp(0);
6732           codegen_->GenerateArrayLoadWithBakerReadBarrier(
6733               out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false);
6734         }
6735       } else {
6736         vixl32::Register out = OutputRegister(instruction);
6737         if (index.IsConstant()) {
6738           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6739           {
6740             // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6741             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6742             GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
6743             codegen_->MaybeRecordImplicitNullCheck(instruction);
6744           }
6745           // If read barriers are enabled, emit read barriers other than
6746           // Baker's using a slow path (and also unpoison the loaded
6747           // reference, if heap poisoning is enabled).
6748           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6749         } else {
6750           UseScratchRegisterScope temps(GetVIXLAssembler());
6751           vixl32::Register temp = temps.Acquire();
6752 
6753           if (has_intermediate_address) {
6754             // We do not need to compute the intermediate address from the array: the
6755             // input instruction has done it already. See the comment in
6756             // `TryExtractArrayAccessAddress()`.
6757             if (kIsDebugBuild) {
6758               HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6759               DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6760             }
6761             temp = obj;
6762           } else {
6763             __ Add(temp, obj, data_offset);
6764           }
6765           {
6766             // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6767             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6768             codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6769             temps.Close();
6770             codegen_->MaybeRecordImplicitNullCheck(instruction);
6771           }
6772           // If read barriers are enabled, emit read barriers other than
6773           // Baker's using a slow path (and also unpoison the loaded
6774           // reference, if heap poisoning is enabled).
6775           codegen_->MaybeGenerateReadBarrierSlow(
6776               instruction, out_loc, out_loc, obj_loc, data_offset, index);
6777         }
6778       }
6779       break;
6780     }
6781 
6782     case DataType::Type::kInt64: {
6783       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6784       // As two macro instructions can be emitted the max size is doubled.
6785       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6786       if (index.IsConstant()) {
6787         size_t offset =
6788             (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6789         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
6790       } else {
6791         UseScratchRegisterScope temps(GetVIXLAssembler());
6792         vixl32::Register temp = temps.Acquire();
6793         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6794         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
6795       }
6796       codegen_->MaybeRecordImplicitNullCheck(instruction);
6797       break;
6798     }
6799 
6800     case DataType::Type::kFloat32: {
6801       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6802       // As two macro instructions can be emitted the max size is doubled.
6803       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6804       vixl32::SRegister out = SRegisterFrom(out_loc);
6805       if (index.IsConstant()) {
6806         size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6807         GetAssembler()->LoadSFromOffset(out, obj, offset);
6808       } else {
6809         UseScratchRegisterScope temps(GetVIXLAssembler());
6810         vixl32::Register temp = temps.Acquire();
6811         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6812         GetAssembler()->LoadSFromOffset(out, temp, data_offset);
6813       }
6814       codegen_->MaybeRecordImplicitNullCheck(instruction);
6815       break;
6816     }
6817 
6818     case DataType::Type::kFloat64: {
6819       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6820       // As two macro instructions can be emitted the max size is doubled.
6821       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6822       if (index.IsConstant()) {
6823         size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6824         GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
6825       } else {
6826         UseScratchRegisterScope temps(GetVIXLAssembler());
6827         vixl32::Register temp = temps.Acquire();
6828         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6829         GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
6830       }
6831       codegen_->MaybeRecordImplicitNullCheck(instruction);
6832       break;
6833     }
6834 
6835     case DataType::Type::kUint32:
6836     case DataType::Type::kUint64:
6837     case DataType::Type::kVoid:
6838       LOG(FATAL) << "Unreachable type " << type;
6839       UNREACHABLE();
6840   }
6841 }
6842 
VisitArraySet(HArraySet * instruction)6843 void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
6844   DataType::Type value_type = instruction->GetComponentType();
6845 
6846   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6847   bool needs_write_barrier =
6848       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6849   bool check_gc_card =
6850       codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
6851 
6852   bool needs_type_check = instruction->NeedsTypeCheck();
6853 
6854   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6855       instruction,
6856       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6857 
6858   locations->SetInAt(0, Location::RequiresRegister());
6859   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6860   if (DataType::IsFloatingPointType(value_type)) {
6861     locations->SetInAt(2, Location::RequiresFpuRegister());
6862   } else {
6863     locations->SetInAt(2, Location::RequiresRegister());
6864   }
6865   if (needs_write_barrier || check_gc_card || instruction->NeedsTypeCheck()) {
6866     // Temporary registers for type checking, write barrier, checking the dirty bit, or register
6867     // poisoning.
6868     locations->AddTemp(Location::RequiresRegister());
6869     locations->AddTemp(Location::RequiresRegister());
6870   } else if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
6871     locations->AddTemp(Location::RequiresRegister());
6872   }
6873 }
6874 
VisitArraySet(HArraySet * instruction)6875 void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
6876   LocationSummary* locations = instruction->GetLocations();
6877   vixl32::Register array = InputRegisterAt(instruction, 0);
6878   Location index = locations->InAt(1);
6879   DataType::Type value_type = instruction->GetComponentType();
6880   bool needs_type_check = instruction->NeedsTypeCheck();
6881   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6882   bool needs_write_barrier =
6883       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6884   uint32_t data_offset =
6885       mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
6886   Location value_loc = locations->InAt(2);
6887   HInstruction* array_instr = instruction->GetArray();
6888   bool has_intermediate_address = array_instr->IsIntermediateAddress();
6889 
6890   switch (value_type) {
6891     case DataType::Type::kBool:
6892     case DataType::Type::kUint8:
6893     case DataType::Type::kInt8:
6894     case DataType::Type::kUint16:
6895     case DataType::Type::kInt16:
6896     case DataType::Type::kInt32: {
6897       if (index.IsConstant()) {
6898         int32_t const_index = Int32ConstantFrom(index);
6899         uint32_t full_offset =
6900             data_offset + (const_index << DataType::SizeShift(value_type));
6901         StoreOperandType store_type = GetStoreOperandType(value_type);
6902         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6903         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6904         GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
6905         codegen_->MaybeRecordImplicitNullCheck(instruction);
6906       } else {
6907         UseScratchRegisterScope temps(GetVIXLAssembler());
6908         vixl32::Register temp = temps.Acquire();
6909 
6910         if (has_intermediate_address) {
6911           // We do not need to compute the intermediate address from the array: the
6912           // input instruction has done it already. See the comment in
6913           // `TryExtractArrayAccessAddress()`.
6914           if (kIsDebugBuild) {
6915             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6916             DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6917           }
6918           temp = array;
6919         } else {
6920           __ Add(temp, array, data_offset);
6921         }
6922         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6923         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6924         codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6925         codegen_->MaybeRecordImplicitNullCheck(instruction);
6926       }
6927       break;
6928     }
6929 
6930     case DataType::Type::kReference: {
6931       vixl32::Register value = RegisterFrom(value_loc);
6932       // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
6933       // See the comment in instruction_simplifier_shared.cc.
6934       DCHECK(!has_intermediate_address);
6935 
6936       if (instruction->InputAt(2)->IsNullConstant()) {
6937         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6938         // As two macro instructions can be emitted the max size is doubled.
6939         EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6940         // Just setting null.
6941         if (index.IsConstant()) {
6942           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6943           GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
6944         } else {
6945           DCHECK(index.IsRegister()) << index;
6946           UseScratchRegisterScope temps(GetVIXLAssembler());
6947           vixl32::Register temp = temps.Acquire();
6948           __ Add(temp, array, data_offset);
6949           codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6950         }
6951         codegen_->MaybeRecordImplicitNullCheck(instruction);
6952         if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6953           // We need to set a write barrier here even though we are writing null, since this write
6954           // barrier is being relied on.
6955           DCHECK(needs_write_barrier);
6956           vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
6957           vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
6958           codegen_->MarkGCCard(temp1, temp2, array);
6959         }
6960         DCHECK(!needs_type_check);
6961         break;
6962       }
6963 
6964       const bool can_value_be_null = instruction->GetValueCanBeNull();
6965       // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
6966       // value is null (without an extra CompareAndBranchIfZero since we already checked if the
6967       // value is null for the type check).
6968       const bool skip_marking_gc_card =
6969           can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
6970       vixl32::Label do_store;
6971       vixl32::Label skip_writing_card;
6972       if (can_value_be_null) {
6973         if (skip_marking_gc_card) {
6974           __ CompareAndBranchIfZero(value, &skip_writing_card, /* is_far_target= */ false);
6975         } else {
6976           __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false);
6977         }
6978       }
6979 
6980       SlowPathCodeARMVIXL* slow_path = nullptr;
6981       if (needs_type_check) {
6982         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
6983         codegen_->AddSlowPath(slow_path);
6984 
6985         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6986         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6987         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6988 
6989         // Note that when read barriers are enabled, the type checks
6990         // are performed without read barriers.  This is fine, even in
6991         // the case where a class object is in the from-space after
6992         // the flip, as a comparison involving such a type would not
6993         // produce a false positive; it may of course produce a false
6994         // negative, in which case we would take the ArraySet slow
6995         // path.
6996 
6997         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
6998         vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
6999 
7000         {
7001           // Ensure we record the pc position immediately after the `ldr` instruction.
7002           ExactAssemblyScope aas(GetVIXLAssembler(),
7003                                  vixl32::kMaxInstructionSizeInBytes,
7004                                  CodeBufferCheckScope::kMaximumSize);
7005           // /* HeapReference<Class> */ temp1 = array->klass_
7006           __ ldr(temp1, MemOperand(array, class_offset));
7007           codegen_->MaybeRecordImplicitNullCheck(instruction);
7008         }
7009         GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7010 
7011         // /* HeapReference<Class> */ temp1 = temp1->component_type_
7012         GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
7013         // /* HeapReference<Class> */ temp2 = value->klass_
7014         GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
7015         // If heap poisoning is enabled, no need to unpoison `temp1`
7016         // nor `temp2`, as we are comparing two poisoned references.
7017         __ Cmp(temp1, temp2);
7018 
7019         if (instruction->StaticTypeOfArrayIsObjectArray()) {
7020           vixl32::Label do_put;
7021           __ B(eq, &do_put, /* is_far_target= */ false);
7022           // If heap poisoning is enabled, the `temp1` reference has
7023           // not been unpoisoned yet; unpoison it now.
7024           GetAssembler()->MaybeUnpoisonHeapReference(temp1);
7025 
7026           // /* HeapReference<Class> */ temp1 = temp1->super_class_
7027           GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
7028           // If heap poisoning is enabled, no need to unpoison
7029           // `temp1`, as we are comparing against null below.
7030           __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
7031           __ Bind(&do_put);
7032         } else {
7033           __ B(ne, slow_path->GetEntryLabel());
7034         }
7035       }
7036 
7037       if (can_value_be_null && !skip_marking_gc_card) {
7038         DCHECK(do_store.IsReferenced());
7039         __ Bind(&do_store);
7040       }
7041 
7042       if (needs_write_barrier) {
7043         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7044         vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7045         codegen_->MarkGCCard(temp1, temp2, array);
7046       } else if (codegen_->ShouldCheckGCCard(
7047                      value_type, instruction->GetValue(), write_barrier_kind)) {
7048         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7049         vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
7050         codegen_->CheckGCCardIsValid(temp1, temp2, array);
7051       }
7052 
7053       if (skip_marking_gc_card) {
7054         // Note that we don't check that the GC card is valid as it can be correctly clean.
7055         DCHECK(skip_writing_card.IsReferenced());
7056         __ Bind(&skip_writing_card);
7057       }
7058 
7059       vixl32::Register source = value;
7060       if (kPoisonHeapReferences) {
7061         vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
7062         DCHECK_EQ(value_type, DataType::Type::kReference);
7063         __ Mov(temp1, value);
7064         GetAssembler()->PoisonHeapReference(temp1);
7065         source = temp1;
7066       }
7067 
7068       {
7069         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7070         // As two macro instructions can be emitted the max size is doubled.
7071         EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7072         if (index.IsConstant()) {
7073           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7074           GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
7075         } else {
7076           DCHECK(index.IsRegister()) << index;
7077 
7078           UseScratchRegisterScope temps(GetVIXLAssembler());
7079           vixl32::Register temp = temps.Acquire();
7080           __ Add(temp, array, data_offset);
7081           codegen_->StoreToShiftedRegOffset(value_type,
7082                                             LocationFrom(source),
7083                                             temp,
7084                                             RegisterFrom(index));
7085         }
7086 
7087         if (can_value_be_null || !needs_type_check) {
7088           codegen_->MaybeRecordImplicitNullCheck(instruction);
7089         }
7090       }
7091 
7092       if (slow_path != nullptr) {
7093         __ Bind(slow_path->GetExitLabel());
7094       }
7095 
7096       break;
7097     }
7098 
7099     case DataType::Type::kInt64: {
7100       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7101       // As two macro instructions can be emitted the max size is doubled.
7102       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7103       Location value = locations->InAt(2);
7104       if (index.IsConstant()) {
7105         size_t offset =
7106             (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7107         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
7108       } else {
7109         UseScratchRegisterScope temps(GetVIXLAssembler());
7110         vixl32::Register temp = temps.Acquire();
7111         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7112         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
7113       }
7114       codegen_->MaybeRecordImplicitNullCheck(instruction);
7115       break;
7116     }
7117 
7118     case DataType::Type::kFloat32: {
7119       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7120       // As two macro instructions can be emitted the max size is doubled.
7121       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7122       Location value = locations->InAt(2);
7123       DCHECK(value.IsFpuRegister());
7124       if (index.IsConstant()) {
7125         size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
7126         GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
7127       } else {
7128         UseScratchRegisterScope temps(GetVIXLAssembler());
7129         vixl32::Register temp = temps.Acquire();
7130         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
7131         GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
7132       }
7133       codegen_->MaybeRecordImplicitNullCheck(instruction);
7134       break;
7135     }
7136 
7137     case DataType::Type::kFloat64: {
7138       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
7139       // As two macro instructions can be emitted the max size is doubled.
7140       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
7141       Location value = locations->InAt(2);
7142       DCHECK(value.IsFpuRegisterPair());
7143       if (index.IsConstant()) {
7144         size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
7145         GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
7146       } else {
7147         UseScratchRegisterScope temps(GetVIXLAssembler());
7148         vixl32::Register temp = temps.Acquire();
7149         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
7150         GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
7151       }
7152       codegen_->MaybeRecordImplicitNullCheck(instruction);
7153       break;
7154     }
7155 
7156     case DataType::Type::kUint32:
7157     case DataType::Type::kUint64:
7158     case DataType::Type::kVoid:
7159       LOG(FATAL) << "Unreachable type " << value_type;
7160       UNREACHABLE();
7161   }
7162 }
7163 
VisitArrayLength(HArrayLength * instruction)7164 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7165   LocationSummary* locations =
7166       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7167   locations->SetInAt(0, Location::RequiresRegister());
7168   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7169 }
7170 
VisitArrayLength(HArrayLength * instruction)7171 void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
7172   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
7173   vixl32::Register obj = InputRegisterAt(instruction, 0);
7174   vixl32::Register out = OutputRegister(instruction);
7175   {
7176     ExactAssemblyScope aas(GetVIXLAssembler(),
7177                            vixl32::kMaxInstructionSizeInBytes,
7178                            CodeBufferCheckScope::kMaximumSize);
7179     __ ldr(out, MemOperand(obj, offset));
7180     codegen_->MaybeRecordImplicitNullCheck(instruction);
7181   }
7182   // Mask out compression flag from String's array length.
7183   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
7184     __ Lsr(out, out, 1u);
7185   }
7186 }
7187 
VisitIntermediateAddress(HIntermediateAddress * instruction)7188 void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7189   LocationSummary* locations =
7190       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7191 
7192   locations->SetInAt(0, Location::RequiresRegister());
7193   locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
7194   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7195 }
7196 
VisitIntermediateAddress(HIntermediateAddress * instruction)7197 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
7198   vixl32::Register out = OutputRegister(instruction);
7199   vixl32::Register first = InputRegisterAt(instruction, 0);
7200   Location second = instruction->GetLocations()->InAt(1);
7201 
7202   if (second.IsRegister()) {
7203     __ Add(out, first, RegisterFrom(second));
7204   } else {
7205     __ Add(out, first, Int32ConstantFrom(second));
7206   }
7207 }
7208 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7209 void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
7210     HIntermediateAddressIndex* instruction) {
7211   LOG(FATAL) << "Unreachable " << instruction->GetId();
7212 }
7213 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)7214 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
7215     HIntermediateAddressIndex* instruction) {
7216   LOG(FATAL) << "Unreachable " << instruction->GetId();
7217 }
7218 
VisitBoundsCheck(HBoundsCheck * instruction)7219 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7220   RegisterSet caller_saves = RegisterSet::Empty();
7221   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7222   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
7223   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
7224   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
7225 
7226   HInstruction* index = instruction->InputAt(0);
7227   HInstruction* length = instruction->InputAt(1);
7228   // If both index and length are constants we can statically check the bounds. But if at least one
7229   // of them is not encodable ArmEncodableConstantOrRegister will create
7230   // Location::RequiresRegister() which is not desired to happen. Instead we create constant
7231   // locations.
7232   bool both_const = index->IsConstant() && length->IsConstant();
7233   locations->SetInAt(0, both_const
7234       ? Location::ConstantLocation(index)
7235       : ArmEncodableConstantOrRegister(index, CMP));
7236   locations->SetInAt(1, both_const
7237       ? Location::ConstantLocation(length)
7238       : ArmEncodableConstantOrRegister(length, CMP));
7239 }
7240 
VisitBoundsCheck(HBoundsCheck * instruction)7241 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
7242   LocationSummary* locations = instruction->GetLocations();
7243   Location index_loc = locations->InAt(0);
7244   Location length_loc = locations->InAt(1);
7245 
7246   if (length_loc.IsConstant()) {
7247     int32_t length = Int32ConstantFrom(length_loc);
7248     if (index_loc.IsConstant()) {
7249       // BCE will remove the bounds check if we are guaranteed to pass.
7250       int32_t index = Int32ConstantFrom(index_loc);
7251       if (index < 0 || index >= length) {
7252         SlowPathCodeARMVIXL* slow_path =
7253             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7254         codegen_->AddSlowPath(slow_path);
7255         __ B(slow_path->GetEntryLabel());
7256       } else {
7257         // Some optimization after BCE may have generated this, and we should not
7258         // generate a bounds check if it is a valid range.
7259       }
7260       return;
7261     }
7262 
7263     SlowPathCodeARMVIXL* slow_path =
7264         new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7265     __ Cmp(RegisterFrom(index_loc), length);
7266     codegen_->AddSlowPath(slow_path);
7267     __ B(hs, slow_path->GetEntryLabel());
7268   } else {
7269     SlowPathCodeARMVIXL* slow_path =
7270         new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7271     __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
7272     codegen_->AddSlowPath(slow_path);
7273     __ B(ls, slow_path->GetEntryLabel());
7274   }
7275 }
7276 
MaybeMarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object,vixl32::Register value,bool emit_null_check)7277 void CodeGeneratorARMVIXL::MaybeMarkGCCard(vixl32::Register temp,
7278                                            vixl32::Register card,
7279                                            vixl32::Register object,
7280                                            vixl32::Register value,
7281                                            bool emit_null_check) {
7282   vixl32::Label is_null;
7283   if (emit_null_check) {
7284     __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false);
7285   }
7286   MarkGCCard(temp, card, object);
7287   if (emit_null_check) {
7288     __ Bind(&is_null);
7289   }
7290 }
7291 
MarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7292 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
7293                                       vixl32::Register card,
7294                                       vixl32::Register object) {
7295   // Load the address of the card table into `card`.
7296   GetAssembler()->LoadFromOffset(
7297       kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7298   // Calculate the offset (in the card table) of the card corresponding to `object`.
7299   __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7300   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
7301   // `object`'s card.
7302   //
7303   // Register `card` contains the address of the card table. Note that the card
7304   // table's base is biased during its creation so that it always starts at an
7305   // address whose least-significant byte is equal to `kCardDirty` (see
7306   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
7307   // below writes the `kCardDirty` (byte) value into the `object`'s card
7308   // (located at `card + object >> kCardShift`).
7309   //
7310   // This dual use of the value in register `card` (1. to calculate the location
7311   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
7312   // (no need to explicitly load `kCardDirty` as an immediate value).
7313   __ Strb(card, MemOperand(card, temp));
7314 }
7315 
CheckGCCardIsValid(vixl32::Register temp,vixl32::Register card,vixl32::Register object)7316 void CodeGeneratorARMVIXL::CheckGCCardIsValid(vixl32::Register temp,
7317                                               vixl32::Register card,
7318                                               vixl32::Register object) {
7319   vixl32::Label done;
7320   // Load the address of the card table into `card`.
7321   GetAssembler()->LoadFromOffset(
7322       kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7323   // Calculate the offset (in the card table) of the card corresponding to `object`.
7324   __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7325   // assert (!clean || !self->is_gc_marking)
7326   __ Ldrb(temp, MemOperand(card, temp));
7327   static_assert(gc::accounting::CardTable::kCardClean == 0);
7328   __ CompareAndBranchIfNonZero(temp, &done, /*is_far_target=*/false);
7329   __ CompareAndBranchIfZero(mr, &done, /*is_far_target=*/false);
7330   __ Bkpt(0);
7331   __ Bind(&done);
7332 }
7333 
VisitParallelMove(HParallelMove * instruction)7334 void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
7335   LOG(FATAL) << "Unreachable";
7336 }
7337 
VisitParallelMove(HParallelMove * instruction)7338 void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) {
7339   if (instruction->GetNext()->IsSuspendCheck() &&
7340       instruction->GetBlock()->GetLoopInformation() != nullptr) {
7341     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
7342     // The back edge will generate the suspend check.
7343     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
7344   }
7345 
7346   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
7347 }
7348 
VisitSuspendCheck(HSuspendCheck * instruction)7349 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7350   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7351       instruction, LocationSummary::kCallOnSlowPath);
7352   locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7353 }
7354 
VisitSuspendCheck(HSuspendCheck * instruction)7355 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7356   HBasicBlock* block = instruction->GetBlock();
7357   if (block->GetLoopInformation() != nullptr) {
7358     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7359     // The back edge will generate the suspend check.
7360     return;
7361   }
7362   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7363     // The goto will generate the suspend check.
7364     return;
7365   }
7366   GenerateSuspendCheck(instruction, nullptr);
7367   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14);
7368 }
7369 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7370 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
7371                                                            HBasicBlock* successor) {
7372   SuspendCheckSlowPathARMVIXL* slow_path =
7373       down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
7374   if (slow_path == nullptr) {
7375     slow_path =
7376         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor);
7377     instruction->SetSlowPath(slow_path);
7378     codegen_->AddSlowPath(slow_path);
7379     if (successor != nullptr) {
7380       DCHECK(successor->IsLoopHeader());
7381     }
7382   } else {
7383     DCHECK_EQ(slow_path->GetSuccessor(), successor);
7384   }
7385 
7386   UseScratchRegisterScope temps(GetVIXLAssembler());
7387   vixl32::Register temp = temps.Acquire();
7388   GetAssembler()->LoadFromOffset(
7389       kLoadWord, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
7390   __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
7391   if (successor == nullptr) {
7392     __ B(ne, slow_path->GetEntryLabel());
7393     __ Bind(slow_path->GetReturnLabel());
7394   } else {
7395     __ B(eq, codegen_->GetLabelOf(successor));
7396     __ B(slow_path->GetEntryLabel());
7397   }
7398 }
7399 
GetAssembler() const7400 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
7401   return codegen_->GetAssembler();
7402 }
7403 
EmitMove(size_t index)7404 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
7405   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7406   MoveOperands* move = moves_[index];
7407   Location source = move->GetSource();
7408   Location destination = move->GetDestination();
7409 
7410   if (source.IsRegister()) {
7411     if (destination.IsRegister()) {
7412       __ Mov(RegisterFrom(destination), RegisterFrom(source));
7413     } else if (destination.IsFpuRegister()) {
7414       __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
7415     } else {
7416       DCHECK(destination.IsStackSlot());
7417       GetAssembler()->StoreToOffset(kStoreWord,
7418                                     RegisterFrom(source),
7419                                     sp,
7420                                     destination.GetStackIndex());
7421     }
7422   } else if (source.IsStackSlot()) {
7423     if (destination.IsRegister()) {
7424       GetAssembler()->LoadFromOffset(kLoadWord,
7425                                      RegisterFrom(destination),
7426                                      sp,
7427                                      source.GetStackIndex());
7428     } else if (destination.IsFpuRegister()) {
7429       GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
7430     } else {
7431       DCHECK(destination.IsStackSlot());
7432       vixl32::Register temp = temps.Acquire();
7433       GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
7434       GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7435     }
7436   } else if (source.IsFpuRegister()) {
7437     if (destination.IsRegister()) {
7438       __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
7439     } else if (destination.IsFpuRegister()) {
7440       __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
7441     } else {
7442       DCHECK(destination.IsStackSlot());
7443       GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
7444     }
7445   } else if (source.IsDoubleStackSlot()) {
7446     if (destination.IsDoubleStackSlot()) {
7447       vixl32::DRegister temp = temps.AcquireD();
7448       GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
7449       GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
7450     } else if (destination.IsRegisterPair()) {
7451       DCHECK(ExpectedPairLayout(destination));
7452       GetAssembler()->LoadFromOffset(
7453           kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
7454     } else {
7455       DCHECK(destination.IsFpuRegisterPair()) << destination;
7456       GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
7457     }
7458   } else if (source.IsRegisterPair()) {
7459     if (destination.IsRegisterPair()) {
7460       __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
7461       __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
7462     } else if (destination.IsFpuRegisterPair()) {
7463       __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
7464     } else {
7465       DCHECK(destination.IsDoubleStackSlot()) << destination;
7466       DCHECK(ExpectedPairLayout(source));
7467       GetAssembler()->StoreToOffset(kStoreWordPair,
7468                                     LowRegisterFrom(source),
7469                                     sp,
7470                                     destination.GetStackIndex());
7471     }
7472   } else if (source.IsFpuRegisterPair()) {
7473     if (destination.IsRegisterPair()) {
7474       __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
7475     } else if (destination.IsFpuRegisterPair()) {
7476       __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
7477     } else {
7478       DCHECK(destination.IsDoubleStackSlot()) << destination;
7479       GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
7480     }
7481   } else {
7482     DCHECK(source.IsConstant()) << source;
7483     HConstant* constant = source.GetConstant();
7484     if (constant->IsIntConstant() || constant->IsNullConstant()) {
7485       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7486       if (destination.IsRegister()) {
7487         __ Mov(RegisterFrom(destination), value);
7488       } else {
7489         DCHECK(destination.IsStackSlot());
7490         vixl32::Register temp = temps.Acquire();
7491         __ Mov(temp, value);
7492         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7493       }
7494     } else if (constant->IsLongConstant()) {
7495       int64_t value = Int64ConstantFrom(source);
7496       if (destination.IsRegisterPair()) {
7497         __ Mov(LowRegisterFrom(destination), Low32Bits(value));
7498         __ Mov(HighRegisterFrom(destination), High32Bits(value));
7499       } else {
7500         DCHECK(destination.IsDoubleStackSlot()) << destination;
7501         vixl32::Register temp = temps.Acquire();
7502         __ Mov(temp, Low32Bits(value));
7503         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7504         __ Mov(temp, High32Bits(value));
7505         GetAssembler()->StoreToOffset(kStoreWord,
7506                                       temp,
7507                                       sp,
7508                                       destination.GetHighStackIndex(kArmWordSize));
7509       }
7510     } else if (constant->IsDoubleConstant()) {
7511       double value = constant->AsDoubleConstant()->GetValue();
7512       if (destination.IsFpuRegisterPair()) {
7513         __ Vmov(DRegisterFrom(destination), value);
7514       } else {
7515         DCHECK(destination.IsDoubleStackSlot()) << destination;
7516         uint64_t int_value = bit_cast<uint64_t, double>(value);
7517         vixl32::Register temp = temps.Acquire();
7518         __ Mov(temp, Low32Bits(int_value));
7519         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7520         __ Mov(temp, High32Bits(int_value));
7521         GetAssembler()->StoreToOffset(kStoreWord,
7522                                       temp,
7523                                       sp,
7524                                       destination.GetHighStackIndex(kArmWordSize));
7525       }
7526     } else {
7527       DCHECK(constant->IsFloatConstant()) << constant->DebugName();
7528       float value = constant->AsFloatConstant()->GetValue();
7529       if (destination.IsFpuRegister()) {
7530         __ Vmov(SRegisterFrom(destination), value);
7531       } else {
7532         DCHECK(destination.IsStackSlot());
7533         vixl32::Register temp = temps.Acquire();
7534         __ Mov(temp, bit_cast<int32_t, float>(value));
7535         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7536       }
7537     }
7538   }
7539 }
7540 
Exchange(vixl32::Register reg,int mem)7541 void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
7542   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7543   vixl32::Register temp = temps.Acquire();
7544   __ Mov(temp, reg);
7545   GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
7546   GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7547 }
7548 
Exchange(int mem1,int mem2)7549 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
7550   // TODO(VIXL32): Double check the performance of this implementation.
7551   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7552   vixl32::Register temp1 = temps.Acquire();
7553   ScratchRegisterScope ensure_scratch(
7554       this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
7555   vixl32::Register temp2(ensure_scratch.GetRegister());
7556 
7557   int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
7558   GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
7559   GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
7560   GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
7561   GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
7562 }
7563 
EmitSwap(size_t index)7564 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
7565   MoveOperands* move = moves_[index];
7566   Location source = move->GetSource();
7567   Location destination = move->GetDestination();
7568   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7569 
7570   if (source.IsRegister() && destination.IsRegister()) {
7571     vixl32::Register temp = temps.Acquire();
7572     DCHECK(!RegisterFrom(source).Is(temp));
7573     DCHECK(!RegisterFrom(destination).Is(temp));
7574     __ Mov(temp, RegisterFrom(destination));
7575     __ Mov(RegisterFrom(destination), RegisterFrom(source));
7576     __ Mov(RegisterFrom(source), temp);
7577   } else if (source.IsRegister() && destination.IsStackSlot()) {
7578     Exchange(RegisterFrom(source), destination.GetStackIndex());
7579   } else if (source.IsStackSlot() && destination.IsRegister()) {
7580     Exchange(RegisterFrom(destination), source.GetStackIndex());
7581   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7582     Exchange(source.GetStackIndex(), destination.GetStackIndex());
7583   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7584     vixl32::Register temp = temps.Acquire();
7585     __ Vmov(temp, SRegisterFrom(source));
7586     __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
7587     __ Vmov(SRegisterFrom(destination), temp);
7588   } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
7589     vixl32::DRegister temp = temps.AcquireD();
7590     __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
7591     __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
7592     __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
7593     __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
7594   } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
7595     vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
7596     int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
7597     DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
7598     vixl32::DRegister temp = temps.AcquireD();
7599     __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
7600     GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
7601     GetAssembler()->StoreDToOffset(temp, sp, mem);
7602   } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
7603     vixl32::DRegister first = DRegisterFrom(source);
7604     vixl32::DRegister second = DRegisterFrom(destination);
7605     vixl32::DRegister temp = temps.AcquireD();
7606     __ Vmov(temp, first);
7607     __ Vmov(first, second);
7608     __ Vmov(second, temp);
7609   } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
7610     vixl32::DRegister reg = source.IsFpuRegisterPair()
7611         ? DRegisterFrom(source)
7612         : DRegisterFrom(destination);
7613     int mem = source.IsFpuRegisterPair()
7614         ? destination.GetStackIndex()
7615         : source.GetStackIndex();
7616     vixl32::DRegister temp = temps.AcquireD();
7617     __ Vmov(temp, reg);
7618     GetAssembler()->LoadDFromOffset(reg, sp, mem);
7619     GetAssembler()->StoreDToOffset(temp, sp, mem);
7620   } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
7621     vixl32::SRegister reg = source.IsFpuRegister()
7622         ? SRegisterFrom(source)
7623         : SRegisterFrom(destination);
7624     int mem = source.IsFpuRegister()
7625         ? destination.GetStackIndex()
7626         : source.GetStackIndex();
7627     vixl32::Register temp = temps.Acquire();
7628     __ Vmov(temp, reg);
7629     GetAssembler()->LoadSFromOffset(reg, sp, mem);
7630     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7631   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
7632     vixl32::DRegister temp1 = temps.AcquireD();
7633     vixl32::DRegister temp2 = temps.AcquireD();
7634     __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
7635     __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
7636     __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
7637     __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
7638   } else {
7639     LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
7640   }
7641 }
7642 
SpillScratch(int reg)7643 void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
7644   __ Push(vixl32::Register(reg));
7645 }
7646 
RestoreScratch(int reg)7647 void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
7648   __ Pop(vixl32::Register(reg));
7649 }
7650 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7651 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
7652     HLoadClass::LoadKind desired_class_load_kind) {
7653   switch (desired_class_load_kind) {
7654     case HLoadClass::LoadKind::kInvalid:
7655       LOG(FATAL) << "UNREACHABLE";
7656       UNREACHABLE();
7657     case HLoadClass::LoadKind::kReferrersClass:
7658       break;
7659     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7660     case HLoadClass::LoadKind::kBootImageRelRo:
7661     case HLoadClass::LoadKind::kAppImageRelRo:
7662     case HLoadClass::LoadKind::kBssEntry:
7663     case HLoadClass::LoadKind::kBssEntryPublic:
7664     case HLoadClass::LoadKind::kBssEntryPackage:
7665       DCHECK(!GetCompilerOptions().IsJitCompiler());
7666       break;
7667     case HLoadClass::LoadKind::kJitBootImageAddress:
7668     case HLoadClass::LoadKind::kJitTableAddress:
7669       DCHECK(GetCompilerOptions().IsJitCompiler());
7670       break;
7671     case HLoadClass::LoadKind::kRuntimeCall:
7672       break;
7673   }
7674   return desired_class_load_kind;
7675 }
7676 
VisitLoadClass(HLoadClass * cls)7677 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
7678   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7679   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7680     InvokeRuntimeCallingConventionARMVIXL calling_convention;
7681     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7682         cls,
7683         LocationFrom(calling_convention.GetRegisterAt(0)),
7684         LocationFrom(r0));
7685     DCHECK(calling_convention.GetRegisterAt(0).Is(r0));
7686     return;
7687   }
7688   DCHECK_EQ(cls->NeedsAccessCheck(),
7689             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7690                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7691 
7692   const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
7693   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7694       ? LocationSummary::kCallOnSlowPath
7695       : LocationSummary::kNoCall;
7696   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7697   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7698     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7699   }
7700 
7701   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
7702     locations->SetInAt(0, Location::RequiresRegister());
7703   }
7704   locations->SetOut(Location::RequiresRegister());
7705   if (load_kind == HLoadClass::LoadKind::kBssEntry ||
7706       load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7707       load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
7708     if (codegen_->EmitNonBakerReadBarrier()) {
7709       // For non-Baker read barrier we have a temp-clobbering call.
7710     } else {
7711       // Rely on the type resolution or initialization and marking to save everything we need.
7712       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7713     }
7714   }
7715 }
7716 
7717 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7718 // move.
VisitLoadClass(HLoadClass * cls)7719 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7720   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7721   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7722     codegen_->GenerateLoadClassRuntimeCall(cls);
7723     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15);
7724     return;
7725   }
7726   DCHECK_EQ(cls->NeedsAccessCheck(),
7727             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7728                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7729 
7730   LocationSummary* locations = cls->GetLocations();
7731   Location out_loc = locations->Out();
7732   vixl32::Register out = OutputRegister(cls);
7733 
7734   const ReadBarrierOption read_barrier_option =
7735       cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
7736   bool generate_null_check = false;
7737   switch (load_kind) {
7738     case HLoadClass::LoadKind::kReferrersClass: {
7739       DCHECK(!cls->CanCallRuntime());
7740       DCHECK(!cls->MustGenerateClinitCheck());
7741       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7742       vixl32::Register current_method = InputRegisterAt(cls, 0);
7743       codegen_->GenerateGcRootFieldLoad(cls,
7744                                         out_loc,
7745                                         current_method,
7746                                         ArtMethod::DeclaringClassOffset().Int32Value(),
7747                                         read_barrier_option);
7748       break;
7749     }
7750     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7751       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7752              codegen_->GetCompilerOptions().IsBootImageExtension());
7753       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7754       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7755           codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7756       codegen_->EmitMovwMovtPlaceholder(labels, out);
7757       break;
7758     }
7759     case HLoadClass::LoadKind::kBootImageRelRo: {
7760       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7761       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
7762       codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
7763       break;
7764     }
7765     case HLoadClass::LoadKind::kAppImageRelRo: {
7766       DCHECK(codegen_->GetCompilerOptions().IsAppImage());
7767       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7768       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7769           codegen_->NewAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7770       codegen_->EmitMovwMovtPlaceholder(labels, out);
7771       __ Ldr(out, MemOperand(out, /*offset=*/ 0));
7772       break;
7773     }
7774     case HLoadClass::LoadKind::kBssEntry:
7775     case HLoadClass::LoadKind::kBssEntryPublic:
7776     case HLoadClass::LoadKind::kBssEntryPackage: {
7777       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls);
7778       codegen_->EmitMovwMovtPlaceholder(labels, out);
7779       // All aligned loads are implicitly atomic consume operations on ARM.
7780       codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7781       generate_null_check = true;
7782       break;
7783     }
7784     case HLoadClass::LoadKind::kJitBootImageAddress: {
7785       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7786       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7787       DCHECK_NE(address, 0u);
7788       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7789       break;
7790     }
7791     case HLoadClass::LoadKind::kJitTableAddress: {
7792       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
7793                                                        cls->GetTypeIndex(),
7794                                                        cls->GetClass()));
7795       // /* GcRoot<mirror::Class> */ out = *out
7796       codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /*offset=*/ 0, read_barrier_option);
7797       break;
7798     }
7799     case HLoadClass::LoadKind::kRuntimeCall:
7800     case HLoadClass::LoadKind::kInvalid:
7801       LOG(FATAL) << "UNREACHABLE";
7802       UNREACHABLE();
7803   }
7804 
7805   if (generate_null_check || cls->MustGenerateClinitCheck()) {
7806     DCHECK(cls->CanCallRuntime());
7807     LoadClassSlowPathARMVIXL* slow_path =
7808         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls);
7809     codegen_->AddSlowPath(slow_path);
7810     if (generate_null_check) {
7811       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7812     }
7813     if (cls->MustGenerateClinitCheck()) {
7814       GenerateClassInitializationCheck(slow_path, out);
7815     } else {
7816       __ Bind(slow_path->GetExitLabel());
7817     }
7818     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16);
7819   }
7820 }
7821 
VisitLoadMethodHandle(HLoadMethodHandle * load)7822 void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7823   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7824   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7825   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7826 }
7827 
VisitLoadMethodHandle(HLoadMethodHandle * load)7828 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7829   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7830 }
7831 
VisitLoadMethodType(HLoadMethodType * load)7832 void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7833   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7834   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7835   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7836 }
7837 
VisitLoadMethodType(HLoadMethodType * load)7838 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7839   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7840 }
7841 
VisitClinitCheck(HClinitCheck * check)7842 void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7843   LocationSummary* locations =
7844       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7845   locations->SetInAt(0, Location::RequiresRegister());
7846   if (check->HasUses()) {
7847     locations->SetOut(Location::SameAsFirstInput());
7848   }
7849   // Rely on the type initialization to save everything we need.
7850   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7851 }
7852 
VisitClinitCheck(HClinitCheck * check)7853 void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7854   // We assume the class is not null.
7855   LoadClassSlowPathARMVIXL* slow_path =
7856       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check);
7857   codegen_->AddSlowPath(slow_path);
7858   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
7859 }
7860 
GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL * slow_path,vixl32::Register class_reg)7861 void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
7862     LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
7863   UseScratchRegisterScope temps(GetVIXLAssembler());
7864   vixl32::Register temp = temps.Acquire();
7865   __ Ldrb(temp, MemOperand(class_reg, kClassStatusByteOffset));
7866   __ Cmp(temp, kShiftedVisiblyInitializedValue);
7867   __ B(lo, slow_path->GetEntryLabel());
7868   __ Bind(slow_path->GetExitLabel());
7869 }
7870 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl32::Register temp,vixl32::FlagsUpdate flags_update)7871 void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
7872     HTypeCheckInstruction* check,
7873     vixl32::Register temp,
7874     vixl32::FlagsUpdate flags_update) {
7875   uint32_t path_to_root = check->GetBitstringPathToRoot();
7876   uint32_t mask = check->GetBitstringMask();
7877   DCHECK(IsPowerOfTwo(mask + 1));
7878   size_t mask_bits = WhichPowerOf2(mask + 1);
7879 
7880   // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
7881   // the Z flag for BNE. This is indicated by the `flags_update` parameter.
7882   if (mask_bits == 16u) {
7883     // Load only the bitstring part of the status word.
7884     __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7885     // Check if the bitstring bits are equal to `path_to_root`.
7886     if (flags_update == SetFlags) {
7887       __ Cmp(temp, path_to_root);
7888     } else {
7889       __ Sub(temp, temp, path_to_root);
7890     }
7891   } else {
7892     // /* uint32_t */ temp = temp->status_
7893     __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7894     if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
7895       // Compare the bitstring bits using SUB.
7896       __ Sub(temp, temp, path_to_root);
7897       // Shift out bits that do not contribute to the comparison.
7898       __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7899     } else if (IsUint<16>(path_to_root)) {
7900       if (temp.IsLow()) {
7901         // Note: Optimized for size but contains one more dependent instruction than necessary.
7902         //       MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
7903         //       macro assembler would use the high reg IP for the constant by default.
7904         // Compare the bitstring bits using SUB.
7905         __ Sub(temp, temp, path_to_root & 0x00ffu);  // 16-bit SUB (immediate) T2
7906         __ Sub(temp, temp, path_to_root & 0xff00u);  // 32-bit SUB (immediate) T3
7907         // Shift out bits that do not contribute to the comparison.
7908         __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7909       } else {
7910         // Extract the bitstring bits.
7911         __ Ubfx(temp, temp, 0, mask_bits);
7912         // Check if the bitstring bits are equal to `path_to_root`.
7913         if (flags_update == SetFlags) {
7914           __ Cmp(temp, path_to_root);
7915         } else {
7916           __ Sub(temp, temp, path_to_root);
7917         }
7918       }
7919     } else {
7920       // Shift out bits that do not contribute to the comparison.
7921       __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7922       // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
7923       if (flags_update == SetFlags) {
7924         __ Cmp(temp, path_to_root << (32u - mask_bits));
7925       } else {
7926         __ Sub(temp, temp, path_to_root << (32u - mask_bits));
7927       }
7928     }
7929   }
7930 }
7931 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7932 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
7933     HLoadString::LoadKind desired_string_load_kind) {
7934   switch (desired_string_load_kind) {
7935     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7936     case HLoadString::LoadKind::kBootImageRelRo:
7937     case HLoadString::LoadKind::kBssEntry:
7938       DCHECK(!GetCompilerOptions().IsJitCompiler());
7939       break;
7940     case HLoadString::LoadKind::kJitBootImageAddress:
7941     case HLoadString::LoadKind::kJitTableAddress:
7942       DCHECK(GetCompilerOptions().IsJitCompiler());
7943       break;
7944     case HLoadString::LoadKind::kRuntimeCall:
7945       break;
7946   }
7947   return desired_string_load_kind;
7948 }
7949 
VisitLoadString(HLoadString * load)7950 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
7951   LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
7952   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7953   HLoadString::LoadKind load_kind = load->GetLoadKind();
7954   if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7955     locations->SetOut(LocationFrom(r0));
7956   } else {
7957     locations->SetOut(Location::RequiresRegister());
7958     if (load_kind == HLoadString::LoadKind::kBssEntry) {
7959       if (codegen_->EmitNonBakerReadBarrier()) {
7960         // For non-Baker read barrier we have a temp-clobbering call.
7961       } else {
7962         // Rely on the pResolveString and marking to save everything we need, including temps.
7963         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7964       }
7965     }
7966   }
7967 }
7968 
7969 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7970 // move.
VisitLoadString(HLoadString * load)7971 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7972   LocationSummary* locations = load->GetLocations();
7973   Location out_loc = locations->Out();
7974   vixl32::Register out = OutputRegister(load);
7975   HLoadString::LoadKind load_kind = load->GetLoadKind();
7976 
7977   switch (load_kind) {
7978     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7979       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7980              codegen_->GetCompilerOptions().IsBootImageExtension());
7981       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7982           codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
7983       codegen_->EmitMovwMovtPlaceholder(labels, out);
7984       return;
7985     }
7986     case HLoadString::LoadKind::kBootImageRelRo: {
7987       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7988       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
7989       codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
7990       return;
7991     }
7992     case HLoadString::LoadKind::kBssEntry: {
7993       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7994           codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
7995       codegen_->EmitMovwMovtPlaceholder(labels, out);
7996       // All aligned loads are implicitly atomic consume operations on ARM.
7997       codegen_->GenerateGcRootFieldLoad(
7998           load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
7999       LoadStringSlowPathARMVIXL* slow_path =
8000           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
8001       codegen_->AddSlowPath(slow_path);
8002       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
8003       __ Bind(slow_path->GetExitLabel());
8004       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17);
8005       return;
8006     }
8007     case HLoadString::LoadKind::kJitBootImageAddress: {
8008       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
8009       DCHECK_NE(address, 0u);
8010       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
8011       return;
8012     }
8013     case HLoadString::LoadKind::kJitTableAddress: {
8014       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
8015                                                         load->GetStringIndex(),
8016                                                         load->GetString()));
8017       // /* GcRoot<mirror::String> */ out = *out
8018       codegen_->GenerateGcRootFieldLoad(
8019           load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
8020       return;
8021     }
8022     default:
8023       break;
8024   }
8025 
8026   DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
8027   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8028   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
8029   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
8030   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
8031   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18);
8032 }
8033 
GetExceptionTlsOffset()8034 static int32_t GetExceptionTlsOffset() {
8035   return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
8036 }
8037 
VisitLoadException(HLoadException * load)8038 void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
8039   LocationSummary* locations =
8040       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
8041   locations->SetOut(Location::RequiresRegister());
8042 }
8043 
VisitLoadException(HLoadException * load)8044 void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
8045   vixl32::Register out = OutputRegister(load);
8046   GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
8047 }
8048 
8049 
VisitClearException(HClearException * clear)8050 void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
8051   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
8052 }
8053 
VisitClearException(HClearException * clear)8054 void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) {
8055   UseScratchRegisterScope temps(GetVIXLAssembler());
8056   vixl32::Register temp = temps.Acquire();
8057   __ Mov(temp, 0);
8058   GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
8059 }
8060 
VisitThrow(HThrow * instruction)8061 void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
8062   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8063       instruction, LocationSummary::kCallOnMainOnly);
8064   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8065   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8066 }
8067 
VisitThrow(HThrow * instruction)8068 void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
8069   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
8070   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
8071 }
8072 
8073 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8074 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8075   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8076     return 1;
8077   }
8078   if (emit_read_barrier &&
8079        (kUseBakerReadBarrier ||
8080           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
8081           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
8082           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
8083     return 1;
8084   }
8085   return 0;
8086 }
8087 
8088 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
8089 // interface pointer, one for loading the current interface.
8090 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)8091 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
8092   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8093     return 3;
8094   }
8095   return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
8096 }
8097 
VisitInstanceOf(HInstanceOf * instruction)8098 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8099   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
8100   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8101   bool baker_read_barrier_slow_path = false;
8102   switch (type_check_kind) {
8103     case TypeCheckKind::kExactCheck:
8104     case TypeCheckKind::kAbstractClassCheck:
8105     case TypeCheckKind::kClassHierarchyCheck:
8106     case TypeCheckKind::kArrayObjectCheck:
8107     case TypeCheckKind::kInterfaceCheck: {
8108       bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
8109       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
8110       baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
8111                                      (type_check_kind != TypeCheckKind::kInterfaceCheck);
8112       break;
8113     }
8114     case TypeCheckKind::kArrayCheck:
8115     case TypeCheckKind::kUnresolvedCheck:
8116       call_kind = LocationSummary::kCallOnSlowPath;
8117       break;
8118     case TypeCheckKind::kBitstringCheck:
8119       break;
8120   }
8121 
8122   LocationSummary* locations =
8123       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8124   if (baker_read_barrier_slow_path) {
8125     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
8126   }
8127   locations->SetInAt(0, Location::RequiresRegister());
8128   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8129     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8130     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8131     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8132   } else {
8133     locations->SetInAt(1, Location::RequiresRegister());
8134   }
8135   // The "out" register is used as a temporary, so it overlaps with the inputs.
8136   // Note that TypeCheckSlowPathARM uses this register too.
8137   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
8138   locations->AddRegisterTemps(
8139       NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
8140 }
8141 
VisitInstanceOf(HInstanceOf * instruction)8142 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
8143   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8144   LocationSummary* locations = instruction->GetLocations();
8145   Location obj_loc = locations->InAt(0);
8146   vixl32::Register obj = InputRegisterAt(instruction, 0);
8147   vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8148       ? vixl32::Register()
8149       : InputRegisterAt(instruction, 1);
8150   Location out_loc = locations->Out();
8151   vixl32::Register out = OutputRegister(instruction);
8152   const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
8153   DCHECK_LE(num_temps, 1u);
8154   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
8155   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8156   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8157   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8158   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8159   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8160   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8161   const uint32_t object_array_data_offset =
8162       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8163   vixl32::Label done;
8164   vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
8165   SlowPathCodeARMVIXL* slow_path = nullptr;
8166 
8167   // Return 0 if `obj` is null.
8168   // avoid null check if we know obj is not null.
8169   if (instruction->MustDoNullCheck()) {
8170     DCHECK(!out.Is(obj));
8171     __ Mov(out, 0);
8172     __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8173   }
8174 
8175   switch (type_check_kind) {
8176     case TypeCheckKind::kExactCheck: {
8177       ReadBarrierOption read_barrier_option =
8178           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8179       // /* HeapReference<Class> */ out = obj->klass_
8180       GenerateReferenceLoadTwoRegisters(instruction,
8181                                         out_loc,
8182                                         obj_loc,
8183                                         class_offset,
8184                                         maybe_temp_loc,
8185                                         read_barrier_option);
8186       // Classes must be equal for the instanceof to succeed.
8187       __ Cmp(out, cls);
8188       // We speculatively set the result to false without changing the condition
8189       // flags, which allows us to avoid some branching later.
8190       __ Mov(LeaveFlags, out, 0);
8191 
8192       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8193       // we check that the output is in a low register, so that a 16-bit MOV
8194       // encoding can be used.
8195       if (out.IsLow()) {
8196         // We use the scope because of the IT block that follows.
8197         ExactAssemblyScope guard(GetVIXLAssembler(),
8198                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8199                                  CodeBufferCheckScope::kExactSize);
8200 
8201         __ it(eq);
8202         __ mov(eq, out, 1);
8203       } else {
8204         __ B(ne, final_label, /* is_far_target= */ false);
8205         __ Mov(out, 1);
8206       }
8207 
8208       break;
8209     }
8210 
8211     case TypeCheckKind::kAbstractClassCheck: {
8212       ReadBarrierOption read_barrier_option =
8213           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8214       // /* HeapReference<Class> */ out = obj->klass_
8215       GenerateReferenceLoadTwoRegisters(instruction,
8216                                         out_loc,
8217                                         obj_loc,
8218                                         class_offset,
8219                                         maybe_temp_loc,
8220                                         read_barrier_option);
8221       // If the class is abstract, we eagerly fetch the super class of the
8222       // object to avoid doing a comparison we know will fail.
8223       vixl32::Label loop;
8224       __ Bind(&loop);
8225       // /* HeapReference<Class> */ out = out->super_class_
8226       GenerateReferenceLoadOneRegister(instruction,
8227                                        out_loc,
8228                                        super_offset,
8229                                        maybe_temp_loc,
8230                                        read_barrier_option);
8231       // If `out` is null, we use it for the result, and jump to the final label.
8232       __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8233       __ Cmp(out, cls);
8234       __ B(ne, &loop, /* is_far_target= */ false);
8235       __ Mov(out, 1);
8236       break;
8237     }
8238 
8239     case TypeCheckKind::kClassHierarchyCheck: {
8240       ReadBarrierOption read_barrier_option =
8241           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8242       // /* HeapReference<Class> */ out = obj->klass_
8243       GenerateReferenceLoadTwoRegisters(instruction,
8244                                         out_loc,
8245                                         obj_loc,
8246                                         class_offset,
8247                                         maybe_temp_loc,
8248                                         read_barrier_option);
8249       // Walk over the class hierarchy to find a match.
8250       vixl32::Label loop, success;
8251       __ Bind(&loop);
8252       __ Cmp(out, cls);
8253       __ B(eq, &success, /* is_far_target= */ false);
8254       // /* HeapReference<Class> */ out = out->super_class_
8255       GenerateReferenceLoadOneRegister(instruction,
8256                                        out_loc,
8257                                        super_offset,
8258                                        maybe_temp_loc,
8259                                        read_barrier_option);
8260       // This is essentially a null check, but it sets the condition flags to the
8261       // proper value for the code that follows the loop, i.e. not `eq`.
8262       __ Cmp(out, 1);
8263       __ B(hs, &loop, /* is_far_target= */ false);
8264 
8265       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8266       // we check that the output is in a low register, so that a 16-bit MOV
8267       // encoding can be used.
8268       if (out.IsLow()) {
8269         // If `out` is null, we use it for the result, and the condition flags
8270         // have already been set to `ne`, so the IT block that comes afterwards
8271         // (and which handles the successful case) turns into a NOP (instead of
8272         // overwriting `out`).
8273         __ Bind(&success);
8274 
8275         // We use the scope because of the IT block that follows.
8276         ExactAssemblyScope guard(GetVIXLAssembler(),
8277                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8278                                  CodeBufferCheckScope::kExactSize);
8279 
8280         // There is only one branch to the `success` label (which is bound to this
8281         // IT block), and it has the same condition, `eq`, so in that case the MOV
8282         // is executed.
8283         __ it(eq);
8284         __ mov(eq, out, 1);
8285       } else {
8286         // If `out` is null, we use it for the result, and jump to the final label.
8287         __ B(final_label);
8288         __ Bind(&success);
8289         __ Mov(out, 1);
8290       }
8291 
8292       break;
8293     }
8294 
8295     case TypeCheckKind::kArrayObjectCheck: {
8296       ReadBarrierOption read_barrier_option =
8297           codegen_->ReadBarrierOptionForInstanceOf(instruction);
8298       // /* HeapReference<Class> */ out = obj->klass_
8299       GenerateReferenceLoadTwoRegisters(instruction,
8300                                         out_loc,
8301                                         obj_loc,
8302                                         class_offset,
8303                                         maybe_temp_loc,
8304                                         read_barrier_option);
8305       // Do an exact check.
8306       vixl32::Label exact_check;
8307       __ Cmp(out, cls);
8308       __ B(eq, &exact_check, /* is_far_target= */ false);
8309       // Otherwise, we need to check that the object's class is a non-primitive array.
8310       // /* HeapReference<Class> */ out = out->component_type_
8311       GenerateReferenceLoadOneRegister(instruction,
8312                                        out_loc,
8313                                        component_offset,
8314                                        maybe_temp_loc,
8315                                        read_barrier_option);
8316       // If `out` is null, we use it for the result, and jump to the final label.
8317       __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8318       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
8319       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8320       __ Cmp(out, 0);
8321       // We speculatively set the result to false without changing the condition
8322       // flags, which allows us to avoid some branching later.
8323       __ Mov(LeaveFlags, out, 0);
8324 
8325       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8326       // we check that the output is in a low register, so that a 16-bit MOV
8327       // encoding can be used.
8328       if (out.IsLow()) {
8329         __ Bind(&exact_check);
8330 
8331         // We use the scope because of the IT block that follows.
8332         ExactAssemblyScope guard(GetVIXLAssembler(),
8333                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8334                                  CodeBufferCheckScope::kExactSize);
8335 
8336         __ it(eq);
8337         __ mov(eq, out, 1);
8338       } else {
8339         __ B(ne, final_label, /* is_far_target= */ false);
8340         __ Bind(&exact_check);
8341         __ Mov(out, 1);
8342       }
8343 
8344       break;
8345     }
8346 
8347     case TypeCheckKind::kArrayCheck: {
8348       // No read barrier since the slow path will retry upon failure.
8349       // /* HeapReference<Class> */ out = obj->klass_
8350       GenerateReferenceLoadTwoRegisters(instruction,
8351                                         out_loc,
8352                                         obj_loc,
8353                                         class_offset,
8354                                         maybe_temp_loc,
8355                                         kWithoutReadBarrier);
8356       __ Cmp(out, cls);
8357       DCHECK(locations->OnlyCallsOnSlowPath());
8358       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8359           instruction, /* is_fatal= */ false);
8360       codegen_->AddSlowPath(slow_path);
8361       __ B(ne, slow_path->GetEntryLabel());
8362       __ Mov(out, 1);
8363       break;
8364     }
8365 
8366     case TypeCheckKind::kInterfaceCheck: {
8367       if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
8368         DCHECK(locations->OnlyCallsOnSlowPath());
8369         slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8370             instruction, /* is_fatal= */ false);
8371         codegen_->AddSlowPath(slow_path);
8372         if (codegen_->EmitNonBakerReadBarrier()) {
8373           __ B(slow_path->GetEntryLabel());
8374           break;
8375         }
8376         // For Baker read barrier, take the slow path while marking.
8377         __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
8378       }
8379 
8380       // Fast-path without read barriers.
8381       UseScratchRegisterScope temps(GetVIXLAssembler());
8382       vixl32::Register temp = RegisterFrom(maybe_temp_loc);
8383       vixl32::Register temp2 = temps.Acquire();
8384       // /* HeapReference<Class> */ temp = obj->klass_
8385       __ Ldr(temp, MemOperand(obj, class_offset));
8386       GetAssembler()->MaybeUnpoisonHeapReference(temp);
8387       // /* HeapReference<Class> */ temp = temp->iftable_
8388       __ Ldr(temp, MemOperand(temp, iftable_offset));
8389       GetAssembler()->MaybeUnpoisonHeapReference(temp);
8390       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8391       __ Ldr(out, MemOperand(temp, array_length_offset));
8392       // Loop through the `IfTable` and check if any class matches.
8393       vixl32::Label loop;
8394       __ Bind(&loop);
8395       // If taken, the result in `out` is already 0 (false).
8396       __ CompareAndBranchIfZero(out, &done, /* is_far_target= */ false);
8397       __ Ldr(temp2, MemOperand(temp, object_array_data_offset));
8398       GetAssembler()->MaybeUnpoisonHeapReference(temp2);
8399       // Go to next interface.
8400       __ Add(temp, temp, static_cast<uint32_t>(2 * kHeapReferenceSize));
8401       __ Sub(out, out, 2);
8402       // Compare the classes and continue the loop if they do not match.
8403       __ Cmp(cls, temp2);
8404       __ B(ne, &loop);
8405       __ Mov(out, 1);
8406       break;
8407     }
8408 
8409     case TypeCheckKind::kUnresolvedCheck: {
8410       // Note that we indeed only call on slow path, but we always go
8411       // into the slow path for the unresolved check case.
8412       //
8413       // We cannot directly call the InstanceofNonTrivial runtime
8414       // entry point without resorting to a type checking slow path
8415       // here (i.e. by calling InvokeRuntime directly), as it would
8416       // require to assign fixed registers for the inputs of this
8417       // HInstanceOf instruction (following the runtime calling
8418       // convention), which might be cluttered by the potential first
8419       // read barrier emission at the beginning of this method.
8420       //
8421       // TODO: Introduce a new runtime entry point taking the object
8422       // to test (instead of its class) as argument, and let it deal
8423       // with the read barrier issues. This will let us refactor this
8424       // case of the `switch` code as it was previously (with a direct
8425       // call to the runtime not using a type checking slow path).
8426       // This should also be beneficial for the other cases above.
8427       DCHECK(locations->OnlyCallsOnSlowPath());
8428       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8429           instruction, /* is_fatal= */ false);
8430       codegen_->AddSlowPath(slow_path);
8431       __ B(slow_path->GetEntryLabel());
8432       break;
8433     }
8434 
8435     case TypeCheckKind::kBitstringCheck: {
8436       // /* HeapReference<Class> */ temp = obj->klass_
8437       GenerateReferenceLoadTwoRegisters(instruction,
8438                                         out_loc,
8439                                         obj_loc,
8440                                         class_offset,
8441                                         maybe_temp_loc,
8442                                         kWithoutReadBarrier);
8443 
8444       GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
8445       // If `out` is a low reg and we would have another low reg temp, we could
8446       // optimize this as RSBS+ADC, see GenerateConditionWithZero().
8447       //
8448       // Also, in some cases when `out` is a low reg and we're loading a constant to IP
8449       // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
8450       // would be the same and we would have fewer direct data dependencies.
8451       codegen_->GenerateConditionWithZero(kCondEQ, out, out);  // CLZ+LSR
8452       break;
8453     }
8454   }
8455 
8456   if (done.IsReferenced()) {
8457     __ Bind(&done);
8458   }
8459 
8460   if (slow_path != nullptr) {
8461     __ Bind(slow_path->GetExitLabel());
8462   }
8463 }
8464 
VisitCheckCast(HCheckCast * instruction)8465 void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8466   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8467   LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
8468   LocationSummary* locations =
8469       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8470   locations->SetInAt(0, Location::RequiresRegister());
8471   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8472     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8473     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8474     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8475   } else {
8476     locations->SetInAt(1, Location::RequiresRegister());
8477   }
8478   locations->AddRegisterTemps(
8479       NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
8480 }
8481 
VisitCheckCast(HCheckCast * instruction)8482 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8483   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8484   LocationSummary* locations = instruction->GetLocations();
8485   Location obj_loc = locations->InAt(0);
8486   vixl32::Register obj = InputRegisterAt(instruction, 0);
8487   vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8488       ? vixl32::Register()
8489       : InputRegisterAt(instruction, 1);
8490   Location temp_loc = locations->GetTemp(0);
8491   vixl32::Register temp = RegisterFrom(temp_loc);
8492   const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
8493   DCHECK_LE(num_temps, 3u);
8494   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8495   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
8496   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8497   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8498   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8499   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8500   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8501   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8502   const uint32_t object_array_data_offset =
8503       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8504 
8505   bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
8506   SlowPathCodeARMVIXL* type_check_slow_path =
8507       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8508           instruction, is_type_check_slow_path_fatal);
8509   codegen_->AddSlowPath(type_check_slow_path);
8510 
8511   vixl32::Label done;
8512   vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
8513   // Avoid null check if we know obj is not null.
8514   if (instruction->MustDoNullCheck()) {
8515     __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8516   }
8517 
8518   switch (type_check_kind) {
8519     case TypeCheckKind::kExactCheck:
8520     case TypeCheckKind::kArrayCheck: {
8521       // /* HeapReference<Class> */ temp = obj->klass_
8522       GenerateReferenceLoadTwoRegisters(instruction,
8523                                         temp_loc,
8524                                         obj_loc,
8525                                         class_offset,
8526                                         maybe_temp2_loc,
8527                                         kWithoutReadBarrier);
8528 
8529       __ Cmp(temp, cls);
8530       // Jump to slow path for throwing the exception or doing a
8531       // more involved array check.
8532       __ B(ne, type_check_slow_path->GetEntryLabel());
8533       break;
8534     }
8535 
8536     case TypeCheckKind::kAbstractClassCheck: {
8537       // /* HeapReference<Class> */ temp = obj->klass_
8538       GenerateReferenceLoadTwoRegisters(instruction,
8539                                         temp_loc,
8540                                         obj_loc,
8541                                         class_offset,
8542                                         maybe_temp2_loc,
8543                                         kWithoutReadBarrier);
8544 
8545       // If the class is abstract, we eagerly fetch the super class of the
8546       // object to avoid doing a comparison we know will fail.
8547       vixl32::Label loop;
8548       __ Bind(&loop);
8549       // /* HeapReference<Class> */ temp = temp->super_class_
8550       GenerateReferenceLoadOneRegister(instruction,
8551                                        temp_loc,
8552                                        super_offset,
8553                                        maybe_temp2_loc,
8554                                        kWithoutReadBarrier);
8555 
8556       // If the class reference currently in `temp` is null, jump to the slow path to throw the
8557       // exception.
8558       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8559 
8560       // Otherwise, compare the classes.
8561       __ Cmp(temp, cls);
8562       __ B(ne, &loop, /* is_far_target= */ false);
8563       break;
8564     }
8565 
8566     case TypeCheckKind::kClassHierarchyCheck: {
8567       // /* HeapReference<Class> */ temp = obj->klass_
8568       GenerateReferenceLoadTwoRegisters(instruction,
8569                                         temp_loc,
8570                                         obj_loc,
8571                                         class_offset,
8572                                         maybe_temp2_loc,
8573                                         kWithoutReadBarrier);
8574 
8575       // Walk over the class hierarchy to find a match.
8576       vixl32::Label loop;
8577       __ Bind(&loop);
8578       __ Cmp(temp, cls);
8579       __ B(eq, final_label, /* is_far_target= */ false);
8580 
8581       // /* HeapReference<Class> */ temp = temp->super_class_
8582       GenerateReferenceLoadOneRegister(instruction,
8583                                        temp_loc,
8584                                        super_offset,
8585                                        maybe_temp2_loc,
8586                                        kWithoutReadBarrier);
8587 
8588       // If the class reference currently in `temp` is null, jump to the slow path to throw the
8589       // exception.
8590       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8591       // Otherwise, jump to the beginning of the loop.
8592       __ B(&loop);
8593       break;
8594     }
8595 
8596     case TypeCheckKind::kArrayObjectCheck:  {
8597       // /* HeapReference<Class> */ temp = obj->klass_
8598       GenerateReferenceLoadTwoRegisters(instruction,
8599                                         temp_loc,
8600                                         obj_loc,
8601                                         class_offset,
8602                                         maybe_temp2_loc,
8603                                         kWithoutReadBarrier);
8604 
8605       // Do an exact check.
8606       __ Cmp(temp, cls);
8607       __ B(eq, final_label, /* is_far_target= */ false);
8608 
8609       // Otherwise, we need to check that the object's class is a non-primitive array.
8610       // /* HeapReference<Class> */ temp = temp->component_type_
8611       GenerateReferenceLoadOneRegister(instruction,
8612                                        temp_loc,
8613                                        component_offset,
8614                                        maybe_temp2_loc,
8615                                        kWithoutReadBarrier);
8616       // If the component type is null, jump to the slow path to throw the exception.
8617       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8618       // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
8619       // to further check that this component type is not a primitive type.
8620       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
8621       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8622       __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
8623       break;
8624     }
8625 
8626     case TypeCheckKind::kUnresolvedCheck:
8627       // We always go into the type check slow path for the unresolved check case.
8628       // We cannot directly call the CheckCast runtime entry point
8629       // without resorting to a type checking slow path here (i.e. by
8630       // calling InvokeRuntime directly), as it would require to
8631       // assign fixed registers for the inputs of this HInstanceOf
8632       // instruction (following the runtime calling convention), which
8633       // might be cluttered by the potential first read barrier
8634       // emission at the beginning of this method.
8635 
8636       __ B(type_check_slow_path->GetEntryLabel());
8637       break;
8638 
8639     case TypeCheckKind::kInterfaceCheck: {
8640       // Avoid read barriers to improve performance of the fast path. We can not get false
8641       // positives by doing this.
8642       // /* HeapReference<Class> */ temp = obj->klass_
8643       GenerateReferenceLoadTwoRegisters(instruction,
8644                                         temp_loc,
8645                                         obj_loc,
8646                                         class_offset,
8647                                         maybe_temp2_loc,
8648                                         kWithoutReadBarrier);
8649 
8650       // /* HeapReference<Class> */ temp = temp->iftable_
8651       GenerateReferenceLoadOneRegister(instruction,
8652                                        temp_loc,
8653                                        iftable_offset,
8654                                        maybe_temp2_loc,
8655                                        kWithoutReadBarrier);
8656       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8657       __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
8658       // Loop through the iftable and check if any class matches.
8659       vixl32::Label start_loop;
8660       __ Bind(&start_loop);
8661       __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
8662                                 type_check_slow_path->GetEntryLabel());
8663       __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
8664       GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
8665       // Go to next interface.
8666       __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
8667       __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
8668       // Compare the classes and continue the loop if they do not match.
8669       __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
8670       __ B(ne, &start_loop, /* is_far_target= */ false);
8671       break;
8672     }
8673 
8674     case TypeCheckKind::kBitstringCheck: {
8675       // /* HeapReference<Class> */ temp = obj->klass_
8676       GenerateReferenceLoadTwoRegisters(instruction,
8677                                         temp_loc,
8678                                         obj_loc,
8679                                         class_offset,
8680                                         maybe_temp2_loc,
8681                                         kWithoutReadBarrier);
8682 
8683       GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
8684       __ B(ne, type_check_slow_path->GetEntryLabel());
8685       break;
8686     }
8687   }
8688   if (done.IsReferenced()) {
8689     __ Bind(&done);
8690   }
8691 
8692   __ Bind(type_check_slow_path->GetExitLabel());
8693 }
8694 
VisitMonitorOperation(HMonitorOperation * instruction)8695 void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8696   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8697       instruction, LocationSummary::kCallOnMainOnly);
8698   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8699   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8700 }
8701 
VisitMonitorOperation(HMonitorOperation * instruction)8702 void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8703   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
8704                           instruction,
8705                           instruction->GetDexPc());
8706   if (instruction->IsEnter()) {
8707     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8708   } else {
8709     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8710   }
8711   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 19);
8712 }
8713 
VisitAnd(HAnd * instruction)8714 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
8715   HandleBitwiseOperation(instruction, AND);
8716 }
8717 
VisitOr(HOr * instruction)8718 void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
8719   HandleBitwiseOperation(instruction, ORR);
8720 }
8721 
VisitXor(HXor * instruction)8722 void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
8723   HandleBitwiseOperation(instruction, EOR);
8724 }
8725 
HandleBitwiseOperation(HBinaryOperation * instruction,Opcode opcode)8726 void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
8727   LocationSummary* locations =
8728       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8729   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8730          || instruction->GetResultType() == DataType::Type::kInt64);
8731   // Note: GVN reorders commutative operations to have the constant on the right hand side.
8732   locations->SetInAt(0, Location::RequiresRegister());
8733   locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
8734   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8735 }
8736 
VisitAnd(HAnd * instruction)8737 void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
8738   HandleBitwiseOperation(instruction);
8739 }
8740 
VisitOr(HOr * instruction)8741 void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
8742   HandleBitwiseOperation(instruction);
8743 }
8744 
VisitXor(HXor * instruction)8745 void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
8746   HandleBitwiseOperation(instruction);
8747 }
8748 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8749 void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8750   LocationSummary* locations =
8751       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8752   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8753          || instruction->GetResultType() == DataType::Type::kInt64);
8754 
8755   locations->SetInAt(0, Location::RequiresRegister());
8756   locations->SetInAt(1, Location::RequiresRegister());
8757   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8758 }
8759 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8760 void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8761   LocationSummary* locations = instruction->GetLocations();
8762   Location first = locations->InAt(0);
8763   Location second = locations->InAt(1);
8764   Location out = locations->Out();
8765 
8766   if (instruction->GetResultType() == DataType::Type::kInt32) {
8767     vixl32::Register first_reg = RegisterFrom(first);
8768     vixl32::Register second_reg = RegisterFrom(second);
8769     vixl32::Register out_reg = RegisterFrom(out);
8770 
8771     switch (instruction->GetOpKind()) {
8772       case HInstruction::kAnd:
8773         __ Bic(out_reg, first_reg, second_reg);
8774         break;
8775       case HInstruction::kOr:
8776         __ Orn(out_reg, first_reg, second_reg);
8777         break;
8778       // There is no EON on arm.
8779       case HInstruction::kXor:
8780       default:
8781         LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8782         UNREACHABLE();
8783     }
8784     return;
8785 
8786   } else {
8787     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8788     vixl32::Register first_low = LowRegisterFrom(first);
8789     vixl32::Register first_high = HighRegisterFrom(first);
8790     vixl32::Register second_low = LowRegisterFrom(second);
8791     vixl32::Register second_high = HighRegisterFrom(second);
8792     vixl32::Register out_low = LowRegisterFrom(out);
8793     vixl32::Register out_high = HighRegisterFrom(out);
8794 
8795     switch (instruction->GetOpKind()) {
8796       case HInstruction::kAnd:
8797         __ Bic(out_low, first_low, second_low);
8798         __ Bic(out_high, first_high, second_high);
8799         break;
8800       case HInstruction::kOr:
8801         __ Orn(out_low, first_low, second_low);
8802         __ Orn(out_high, first_high, second_high);
8803         break;
8804       // There is no EON on arm.
8805       case HInstruction::kXor:
8806       default:
8807         LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8808         UNREACHABLE();
8809     }
8810   }
8811 }
8812 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8813 void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
8814     HDataProcWithShifterOp* instruction) {
8815   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
8816          instruction->GetType() == DataType::Type::kInt64);
8817   LocationSummary* locations =
8818       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8819   const bool overlap = instruction->GetType() == DataType::Type::kInt64 &&
8820                        HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
8821 
8822   locations->SetInAt(0, Location::RequiresRegister());
8823   locations->SetInAt(1, Location::RequiresRegister());
8824   locations->SetOut(Location::RequiresRegister(),
8825                     overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
8826 }
8827 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8828 void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
8829     HDataProcWithShifterOp* instruction) {
8830   const LocationSummary* const locations = instruction->GetLocations();
8831   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
8832   const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
8833 
8834   if (instruction->GetType() == DataType::Type::kInt32) {
8835     const vixl32::Register first = InputRegisterAt(instruction, 0);
8836     const vixl32::Register output = OutputRegister(instruction);
8837     const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64
8838         ? LowRegisterFrom(locations->InAt(1))
8839         : InputRegisterAt(instruction, 1);
8840 
8841     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8842       DCHECK_EQ(kind, HInstruction::kAdd);
8843 
8844       switch (op_kind) {
8845         case HDataProcWithShifterOp::kUXTB:
8846           __ Uxtab(output, first, second);
8847           break;
8848         case HDataProcWithShifterOp::kUXTH:
8849           __ Uxtah(output, first, second);
8850           break;
8851         case HDataProcWithShifterOp::kSXTB:
8852           __ Sxtab(output, first, second);
8853           break;
8854         case HDataProcWithShifterOp::kSXTH:
8855           __ Sxtah(output, first, second);
8856           break;
8857         default:
8858           LOG(FATAL) << "Unexpected operation kind: " << op_kind;
8859           UNREACHABLE();
8860       }
8861     } else {
8862       GenerateDataProcInstruction(kind,
8863                                   output,
8864                                   first,
8865                                   Operand(second,
8866                                           ShiftFromOpKind(op_kind),
8867                                           instruction->GetShiftAmount()),
8868                                   codegen_);
8869     }
8870   } else {
8871     DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
8872 
8873     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8874       const vixl32::Register second = InputRegisterAt(instruction, 1);
8875 
8876       DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
8877       GenerateDataProc(kind,
8878                        locations->Out(),
8879                        locations->InAt(0),
8880                        second,
8881                        Operand(second, ShiftType::ASR, 31),
8882                        codegen_);
8883     } else {
8884       GenerateLongDataProc(instruction, codegen_);
8885     }
8886   }
8887 }
8888 
8889 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateAndConst(vixl32::Register out,vixl32::Register first,uint32_t value)8890 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
8891                                                        vixl32::Register first,
8892                                                        uint32_t value) {
8893   // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
8894   if (value == 0xffffffffu) {
8895     if (!out.Is(first)) {
8896       __ Mov(out, first);
8897     }
8898     return;
8899   }
8900   if (value == 0u) {
8901     __ Mov(out, 0);
8902     return;
8903   }
8904   if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
8905     __ And(out, first, value);
8906   } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
8907     __ Bic(out, first, ~value);
8908   } else {
8909     DCHECK(IsPowerOfTwo(value + 1));
8910     __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
8911   }
8912 }
8913 
8914 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateOrrConst(vixl32::Register out,vixl32::Register first,uint32_t value)8915 void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
8916                                                        vixl32::Register first,
8917                                                        uint32_t value) {
8918   // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
8919   if (value == 0u) {
8920     if (!out.Is(first)) {
8921       __ Mov(out, first);
8922     }
8923     return;
8924   }
8925   if (value == 0xffffffffu) {
8926     __ Mvn(out, 0);
8927     return;
8928   }
8929   if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
8930     __ Orr(out, first, value);
8931   } else {
8932     DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
8933     __ Orn(out, first, ~value);
8934   }
8935 }
8936 
8937 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateEorConst(vixl32::Register out,vixl32::Register first,uint32_t value)8938 void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
8939                                                        vixl32::Register first,
8940                                                        uint32_t value) {
8941   // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
8942   if (value == 0u) {
8943     if (!out.Is(first)) {
8944       __ Mov(out, first);
8945     }
8946     return;
8947   }
8948   __ Eor(out, first, value);
8949 }
8950 
GenerateAddLongConst(Location out,Location first,uint64_t value)8951 void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out,
8952                                                            Location first,
8953                                                            uint64_t value) {
8954   vixl32::Register out_low = LowRegisterFrom(out);
8955   vixl32::Register out_high = HighRegisterFrom(out);
8956   vixl32::Register first_low = LowRegisterFrom(first);
8957   vixl32::Register first_high = HighRegisterFrom(first);
8958   uint32_t value_low = Low32Bits(value);
8959   uint32_t value_high = High32Bits(value);
8960   if (value_low == 0u) {
8961     if (!out_low.Is(first_low)) {
8962       __ Mov(out_low, first_low);
8963     }
8964     __ Add(out_high, first_high, value_high);
8965     return;
8966   }
8967   __ Adds(out_low, first_low, value_low);
8968   if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) {
8969     __ Adc(out_high, first_high, value_high);
8970   } else {
8971     DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high));
8972     __ Sbc(out_high, first_high, ~value_high);
8973   }
8974 }
8975 
HandleBitwiseOperation(HBinaryOperation * instruction)8976 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
8977   LocationSummary* locations = instruction->GetLocations();
8978   Location first = locations->InAt(0);
8979   Location second = locations->InAt(1);
8980   Location out = locations->Out();
8981 
8982   if (second.IsConstant()) {
8983     uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
8984     uint32_t value_low = Low32Bits(value);
8985     if (instruction->GetResultType() == DataType::Type::kInt32) {
8986       vixl32::Register first_reg = InputRegisterAt(instruction, 0);
8987       vixl32::Register out_reg = OutputRegister(instruction);
8988       if (instruction->IsAnd()) {
8989         GenerateAndConst(out_reg, first_reg, value_low);
8990       } else if (instruction->IsOr()) {
8991         GenerateOrrConst(out_reg, first_reg, value_low);
8992       } else {
8993         DCHECK(instruction->IsXor());
8994         GenerateEorConst(out_reg, first_reg, value_low);
8995       }
8996     } else {
8997       DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8998       uint32_t value_high = High32Bits(value);
8999       vixl32::Register first_low = LowRegisterFrom(first);
9000       vixl32::Register first_high = HighRegisterFrom(first);
9001       vixl32::Register out_low = LowRegisterFrom(out);
9002       vixl32::Register out_high = HighRegisterFrom(out);
9003       if (instruction->IsAnd()) {
9004         GenerateAndConst(out_low, first_low, value_low);
9005         GenerateAndConst(out_high, first_high, value_high);
9006       } else if (instruction->IsOr()) {
9007         GenerateOrrConst(out_low, first_low, value_low);
9008         GenerateOrrConst(out_high, first_high, value_high);
9009       } else {
9010         DCHECK(instruction->IsXor());
9011         GenerateEorConst(out_low, first_low, value_low);
9012         GenerateEorConst(out_high, first_high, value_high);
9013       }
9014     }
9015     return;
9016   }
9017 
9018   if (instruction->GetResultType() == DataType::Type::kInt32) {
9019     vixl32::Register first_reg = InputRegisterAt(instruction, 0);
9020     vixl32::Register second_reg = InputRegisterAt(instruction, 1);
9021     vixl32::Register out_reg = OutputRegister(instruction);
9022     if (instruction->IsAnd()) {
9023       __ And(out_reg, first_reg, second_reg);
9024     } else if (instruction->IsOr()) {
9025       __ Orr(out_reg, first_reg, second_reg);
9026     } else {
9027       DCHECK(instruction->IsXor());
9028       __ Eor(out_reg, first_reg, second_reg);
9029     }
9030   } else {
9031     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
9032     vixl32::Register first_low = LowRegisterFrom(first);
9033     vixl32::Register first_high = HighRegisterFrom(first);
9034     vixl32::Register second_low = LowRegisterFrom(second);
9035     vixl32::Register second_high = HighRegisterFrom(second);
9036     vixl32::Register out_low = LowRegisterFrom(out);
9037     vixl32::Register out_high = HighRegisterFrom(out);
9038     if (instruction->IsAnd()) {
9039       __ And(out_low, first_low, second_low);
9040       __ And(out_high, first_high, second_high);
9041     } else if (instruction->IsOr()) {
9042       __ Orr(out_low, first_low, second_low);
9043       __ Orr(out_high, first_high, second_high);
9044     } else {
9045       DCHECK(instruction->IsXor());
9046       __ Eor(out_low, first_low, second_low);
9047       __ Eor(out_high, first_high, second_high);
9048     }
9049   }
9050 }
9051 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9052 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
9053     HInstruction* instruction,
9054     Location out,
9055     uint32_t offset,
9056     Location maybe_temp,
9057     ReadBarrierOption read_barrier_option) {
9058   vixl32::Register out_reg = RegisterFrom(out);
9059   if (read_barrier_option == kWithReadBarrier) {
9060     DCHECK(codegen_->EmitReadBarrier());
9061     DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9062     if (kUseBakerReadBarrier) {
9063       // Load with fast path based Baker's read barrier.
9064       // /* HeapReference<Object> */ out = *(out + offset)
9065       codegen_->GenerateFieldLoadWithBakerReadBarrier(
9066           instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false);
9067     } else {
9068       // Load with slow path based read barrier.
9069       // Save the value of `out` into `maybe_temp` before overwriting it
9070       // in the following move operation, as we will need it for the
9071       // read barrier below.
9072       __ Mov(RegisterFrom(maybe_temp), out_reg);
9073       // /* HeapReference<Object> */ out = *(out + offset)
9074       GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9075       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
9076     }
9077   } else {
9078     // Plain load with no read barrier.
9079     // /* HeapReference<Object> */ out = *(out + offset)
9080     GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
9081     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9082   }
9083 }
9084 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)9085 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
9086     HInstruction* instruction,
9087     Location out,
9088     Location obj,
9089     uint32_t offset,
9090     Location maybe_temp,
9091     ReadBarrierOption read_barrier_option) {
9092   vixl32::Register out_reg = RegisterFrom(out);
9093   vixl32::Register obj_reg = RegisterFrom(obj);
9094   if (read_barrier_option == kWithReadBarrier) {
9095     DCHECK(codegen_->EmitReadBarrier());
9096     if (kUseBakerReadBarrier) {
9097       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
9098       // Load with fast path based Baker's read barrier.
9099       // /* HeapReference<Object> */ out = *(obj + offset)
9100       codegen_->GenerateFieldLoadWithBakerReadBarrier(
9101           instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false);
9102     } else {
9103       // Load with slow path based read barrier.
9104       // /* HeapReference<Object> */ out = *(obj + offset)
9105       GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9106       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
9107     }
9108   } else {
9109     // Plain load with no read barrier.
9110     // /* HeapReference<Object> */ out = *(obj + offset)
9111     GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
9112     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
9113   }
9114 }
9115 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,vixl32::Register obj,uint32_t offset,ReadBarrierOption read_barrier_option)9116 void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
9117     HInstruction* instruction,
9118     Location root,
9119     vixl32::Register obj,
9120     uint32_t offset,
9121     ReadBarrierOption read_barrier_option) {
9122   vixl32::Register root_reg = RegisterFrom(root);
9123   if (read_barrier_option == kWithReadBarrier) {
9124     DCHECK(EmitReadBarrier());
9125     if (kUseBakerReadBarrier) {
9126       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
9127       // Baker's read barrier are used.
9128 
9129       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
9130       // the Marking Register) to decide whether we need to enter
9131       // the slow path to mark the GC root.
9132       //
9133       // We use shared thunks for the slow path; shared within the method
9134       // for JIT, across methods for AOT. That thunk checks the reference
9135       // and jumps to the entrypoint if needed.
9136       //
9137       //     lr = &return_address;
9138       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
9139       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9140       //       goto gc_root_thunk<root_reg>(lr)
9141       //     }
9142       //   return_address:
9143 
9144       UseScratchRegisterScope temps(GetVIXLAssembler());
9145       temps.Exclude(ip);
9146       bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
9147       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
9148 
9149       size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
9150       size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
9151       size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9152                           narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9153       ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9154       vixl32::Label return_address;
9155       EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9156       __ cmp(mr, Operand(0));
9157       // Currently the offset is always within range. If that changes,
9158       // we shall have to split the load the same way as for fields.
9159       DCHECK_LT(offset, kReferenceLoadMinFarOffset);
9160       ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9161       __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
9162       EmitBakerReadBarrierBne(custom_data);
9163       __ bind(&return_address);
9164       DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9165                 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9166                        : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
9167     } else {
9168       // GC root loaded through a slow path for read barriers other
9169       // than Baker's.
9170       // /* GcRoot<mirror::Object>* */ root = obj + offset
9171       __ Add(root_reg, obj, offset);
9172       // /* mirror::Object* */ root = root->Read()
9173       GenerateReadBarrierForRootSlow(instruction, root, root);
9174     }
9175   } else {
9176     // Plain GC root load with no read barrier.
9177     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
9178     GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
9179     // Note that GC roots are not affected by heap poisoning, thus we
9180     // do not have to unpoison `root_reg` here.
9181   }
9182   MaybeGenerateMarkingRegisterCheck(/* code= */ 20);
9183 }
9184 
GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value,vixl::aarch32::Register old_value)9185 void CodeGeneratorARMVIXL::GenerateIntrinsicMoveWithBakerReadBarrier(
9186     vixl::aarch32::Register marked_old_value,
9187     vixl::aarch32::Register old_value) {
9188   DCHECK(EmitBakerReadBarrier());
9189 
9190   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
9191   // For low registers, we can reuse the GC root narrow entrypoint, for high registers
9192   // we use a specialized entrypoint because the register bits are 8-11 instead of 12-15.
9193   bool narrow_mov = marked_old_value.IsLow();
9194   uint32_t custom_data = narrow_mov
9195       ? EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode(), /*narrow=*/ true)
9196       : EncodeBakerReadBarrierIntrinsicCasData(marked_old_value.GetCode());
9197 
9198   size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* MOV */ (narrow_mov ? 1u : 0u);
9199   size_t wide_instructions = /* ADR+CMP+MOV+BNE */ 4u - narrow_instructions;
9200   size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9201                       narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9202   ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9203   vixl32::Label return_address;
9204   EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9205   __ cmp(mr, Operand(0));
9206   ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9207   __ mov(EncodingSize(narrow_mov ? Narrow : Wide), marked_old_value, old_value);
9208   EmitBakerReadBarrierBne(custom_data);
9209   __ bind(&return_address);
9210   DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9211             narrow_mov
9212                 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
9213                 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET);
9214 }
9215 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,const vixl32::MemOperand & src,bool needs_null_check)9216 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9217                                                                  Location ref,
9218                                                                  vixl32::Register obj,
9219                                                                  const vixl32::MemOperand& src,
9220                                                                  bool needs_null_check) {
9221   DCHECK(EmitBakerReadBarrier());
9222 
9223   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9224   // Marking Register) to decide whether we need to enter the slow
9225   // path to mark the reference. Then, in the slow path, check the
9226   // gray bit in the lock word of the reference's holder (`obj`) to
9227   // decide whether to mark `ref` or not.
9228   //
9229   // We use shared thunks for the slow path; shared within the method
9230   // for JIT, across methods for AOT. That thunk checks the holder
9231   // and jumps to the entrypoint if needed. If the holder is not gray,
9232   // it creates a fake dependency and returns to the LDR instruction.
9233   //
9234   //     lr = &gray_return_address;
9235   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9236   //       goto field_thunk<holder_reg, base_reg>(lr)
9237   //     }
9238   //   not_gray_return_address:
9239   //     // Original reference load. If the offset is too large to fit
9240   //     // into LDR, we use an adjusted base register here.
9241   //     HeapReference<mirror::Object> reference = *(obj+offset);
9242   //   gray_return_address:
9243 
9244   DCHECK(src.GetAddrMode() == vixl32::Offset);
9245   DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
9246   vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9247   bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
9248 
9249   UseScratchRegisterScope temps(GetVIXLAssembler());
9250   temps.Exclude(ip);
9251   uint32_t custom_data =
9252       EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
9253 
9254   {
9255     size_t narrow_instructions =
9256         /* CMP */ (mr.IsLow() ? 1u : 0u) +
9257         /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
9258     size_t wide_instructions =
9259         /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9260     size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9261                         narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9262     ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9263     vixl32::Label return_address;
9264     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9265     __ cmp(mr, Operand(0));
9266     EmitBakerReadBarrierBne(custom_data);
9267     ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9268     __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
9269     if (needs_null_check) {
9270       MaybeRecordImplicitNullCheck(instruction);
9271     }
9272     // Note: We need a specific width for the unpoisoning NEG.
9273     if (kPoisonHeapReferences) {
9274       if (narrow) {
9275         // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
9276         __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
9277       } else {
9278         __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9279       }
9280     }
9281     __ bind(&return_address);
9282     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9283               narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
9284                      : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
9285   }
9286   MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip));
9287 }
9288 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check)9289 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
9290                                                                  Location ref,
9291                                                                  vixl32::Register obj,
9292                                                                  uint32_t offset,
9293                                                                  Location maybe_temp,
9294                                                                  bool needs_null_check) {
9295   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
9296   vixl32::Register base = obj;
9297   if (offset >= kReferenceLoadMinFarOffset) {
9298     base = RegisterFrom(maybe_temp);
9299     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
9300     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
9301     offset &= (kReferenceLoadMinFarOffset - 1u);
9302   }
9303   GenerateFieldLoadWithBakerReadBarrier(
9304       instruction, ref, obj, MemOperand(base, offset), needs_null_check);
9305 }
9306 
GenerateArrayLoadWithBakerReadBarrier(Location ref,vixl32::Register obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)9307 void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
9308                                                                  vixl32::Register obj,
9309                                                                  uint32_t data_offset,
9310                                                                  Location index,
9311                                                                  Location temp,
9312                                                                  bool needs_null_check) {
9313   DCHECK(EmitBakerReadBarrier());
9314 
9315   static_assert(
9316       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
9317       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
9318   ScaleFactor scale_factor = TIMES_4;
9319 
9320   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
9321   // Marking Register) to decide whether we need to enter the slow
9322   // path to mark the reference. Then, in the slow path, check the
9323   // gray bit in the lock word of the reference's holder (`obj`) to
9324   // decide whether to mark `ref` or not.
9325   //
9326   // We use shared thunks for the slow path; shared within the method
9327   // for JIT, across methods for AOT. That thunk checks the holder
9328   // and jumps to the entrypoint if needed. If the holder is not gray,
9329   // it creates a fake dependency and returns to the LDR instruction.
9330   //
9331   //     lr = &gray_return_address;
9332   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9333   //       goto array_thunk<base_reg>(lr)
9334   //     }
9335   //   not_gray_return_address:
9336   //     // Original reference load. If the offset is too large to fit
9337   //     // into LDR, we use an adjusted base register here.
9338   //     HeapReference<mirror::Object> reference = data[index];
9339   //   gray_return_address:
9340 
9341   DCHECK(index.IsValid());
9342   vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
9343   vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9344   vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32);  // Raw pointer.
9345 
9346   UseScratchRegisterScope temps(GetVIXLAssembler());
9347   temps.Exclude(ip);
9348   uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
9349 
9350   __ Add(data_reg, obj, Operand(data_offset));
9351   {
9352     size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
9353     size_t wide_instructions =
9354         /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9355     size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9356                         narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9357     ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9358     vixl32::Label return_address;
9359     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9360     __ cmp(mr, Operand(0));
9361     EmitBakerReadBarrierBne(custom_data);
9362     ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9363     __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
9364     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
9365     // Note: We need a Wide NEG for the unpoisoning.
9366     if (kPoisonHeapReferences) {
9367       __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9368     }
9369     __ bind(&return_address);
9370     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9371               BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
9372   }
9373   MaybeGenerateMarkingRegisterCheck(/* code= */ 22, /* temp_loc= */ LocationFrom(ip));
9374 }
9375 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)9376 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
9377   // The following condition is a compile-time one, so it does not have a run-time cost.
9378   if (kIsDebugBuild && EmitBakerReadBarrier()) {
9379     // The following condition is a run-time one; it is executed after the
9380     // previous compile-time test, to avoid penalizing non-debug builds.
9381     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
9382       UseScratchRegisterScope temps(GetVIXLAssembler());
9383       vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
9384       GetAssembler()->GenerateMarkingRegisterCheck(temp,
9385                                                    kMarkingRegisterCheckBreakCodeBaseCode + code);
9386     }
9387   }
9388 }
9389 
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9390 SlowPathCodeARMVIXL* CodeGeneratorARMVIXL::AddReadBarrierSlowPath(HInstruction* instruction,
9391                                                                   Location out,
9392                                                                   Location ref,
9393                                                                   Location obj,
9394                                                                   uint32_t offset,
9395                                                                   Location index) {
9396   SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator())
9397       ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index);
9398   AddSlowPath(slow_path);
9399   return slow_path;
9400 }
9401 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9402 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
9403                                                    Location out,
9404                                                    Location ref,
9405                                                    Location obj,
9406                                                    uint32_t offset,
9407                                                    Location index) {
9408   DCHECK(EmitReadBarrier());
9409 
9410   // Insert a slow path based read barrier *after* the reference load.
9411   //
9412   // If heap poisoning is enabled, the unpoisoning of the loaded
9413   // reference will be carried out by the runtime within the slow
9414   // path.
9415   //
9416   // Note that `ref` currently does not get unpoisoned (when heap
9417   // poisoning is enabled), which is alright as the `ref` argument is
9418   // not used by the artReadBarrierSlow entry point.
9419   //
9420   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
9421   SlowPathCodeARMVIXL* slow_path =
9422       AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
9423 
9424   __ B(slow_path->GetEntryLabel());
9425   __ Bind(slow_path->GetExitLabel());
9426 }
9427 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9428 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
9429                                                         Location out,
9430                                                         Location ref,
9431                                                         Location obj,
9432                                                         uint32_t offset,
9433                                                         Location index) {
9434   if (EmitReadBarrier()) {
9435     // Baker's read barriers shall be handled by the fast path
9436     // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
9437     DCHECK(!kUseBakerReadBarrier);
9438     // If heap poisoning is enabled, unpoisoning will be taken care of
9439     // by the runtime within the slow path.
9440     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
9441   } else if (kPoisonHeapReferences) {
9442     GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
9443   }
9444 }
9445 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)9446 void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
9447                                                           Location out,
9448                                                           Location root) {
9449   DCHECK(EmitReadBarrier());
9450 
9451   // Insert a slow path based read barrier *after* the GC root load.
9452   //
9453   // Note that GC roots are not affected by heap poisoning, so we do
9454   // not need to do anything special for this here.
9455   SlowPathCodeARMVIXL* slow_path =
9456       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root);
9457   AddSlowPath(slow_path);
9458 
9459   __ B(slow_path->GetEntryLabel());
9460   __ Bind(slow_path->GetExitLabel());
9461 }
9462 
9463 // Check if the desired_dispatch_info is supported. If it is, return it,
9464 // otherwise return a fall-back info that should be used instead.
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)9465 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
9466     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
9467     ArtMethod* method) {
9468   if (method->IsIntrinsic() &&
9469       desired_dispatch_info.code_ptr_location == CodePtrLocation::kCallCriticalNative) {
9470     // As a work-around for soft-float native ABI interfering with type checks, we are
9471     // inserting fake calls to Float.floatToRawIntBits() or Double.doubleToRawLongBits()
9472     // when a float or double argument is passed in core registers but we cannot do that
9473     // for actual intrinsic implementations that expect them in FP registers. Therefore
9474     // we do not use `kCallCriticalNative` for intrinsics with FP arguments; if they are
9475     // properly intrinsified, the dispatch type does not matter anyway.
9476     ScopedObjectAccess soa(Thread::Current());
9477     uint32_t shorty_len;
9478     const char* shorty = method->GetShorty(&shorty_len);
9479     for (uint32_t i = 1; i != shorty_len; ++i) {
9480       if (shorty[i] == 'D' || shorty[i] == 'F') {
9481         HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
9482         dispatch_info.code_ptr_location = CodePtrLocation::kCallArtMethod;
9483         return dispatch_info;
9484       }
9485     }
9486   }
9487   return desired_dispatch_info;
9488 }
9489 
9490 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)9491 void CodeGeneratorARMVIXL::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
9492   switch (load_kind) {
9493     case MethodLoadKind::kBootImageLinkTimePcRelative: {
9494       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9495       PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
9496       vixl32::Register temp_reg = RegisterFrom(temp);
9497       EmitMovwMovtPlaceholder(labels, temp_reg);
9498       break;
9499     }
9500     case MethodLoadKind::kBootImageRelRo: {
9501       uint32_t boot_image_offset = GetBootImageOffset(invoke);
9502       LoadBootImageRelRoEntry(RegisterFrom(temp), boot_image_offset);
9503       break;
9504     }
9505     case MethodLoadKind::kBssEntry: {
9506       PcRelativePatchInfo* labels = NewMethodBssEntryPatch(invoke->GetMethodReference());
9507       vixl32::Register temp_reg = RegisterFrom(temp);
9508       EmitMovwMovtPlaceholder(labels, temp_reg);
9509       // All aligned loads are implicitly atomic consume operations on ARM.
9510       GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9511       break;
9512     }
9513     case MethodLoadKind::kJitDirectAddress: {
9514       __ Mov(RegisterFrom(temp), Operand::From(invoke->GetResolvedMethod()));
9515       break;
9516     }
9517     case MethodLoadKind::kRuntimeCall: {
9518       // Test situation, don't do anything.
9519       break;
9520     }
9521     default: {
9522       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
9523       UNREACHABLE();
9524     }
9525   }
9526 }
9527 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)9528 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
9529     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
9530   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
9531   switch (invoke->GetMethodLoadKind()) {
9532     case MethodLoadKind::kStringInit: {
9533       uint32_t offset =
9534           GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
9535       // temp = thread->string_init_entrypoint
9536       GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
9537       break;
9538     }
9539     case MethodLoadKind::kRecursive: {
9540       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
9541       break;
9542     }
9543     case MethodLoadKind::kRuntimeCall: {
9544       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
9545       return;  // No code pointer retrieval; the runtime performs the call directly.
9546     }
9547     case MethodLoadKind::kBootImageLinkTimePcRelative:
9548       // Note: Unlike arm64, x86 and x86-64, we do not avoid the materialization of method
9549       // pointer for kCallCriticalNative because it would not save us an instruction from
9550       // the current sequence MOVW+MOVT+ADD(pc)+LDR+BL. The ADD(pc) separates the patched
9551       // offset instructions MOVW+MOVT from the entrypoint load, so they cannot be fused.
9552       FALLTHROUGH_INTENDED;
9553     default: {
9554       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
9555       break;
9556     }
9557   }
9558 
9559   auto call_code_pointer_member = [&](MemberOffset offset) {
9560     // LR = callee_method->member;
9561     GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
9562     {
9563       // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9564       // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9565       ExactAssemblyScope aas(GetVIXLAssembler(),
9566                              vixl32::k16BitT32InstructionSizeInBytes,
9567                              CodeBufferCheckScope::kExactSize);
9568       // LR()
9569       __ blx(lr);
9570       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9571     }
9572   };
9573   switch (invoke->GetCodePtrLocation()) {
9574     case CodePtrLocation::kCallSelf:
9575       {
9576         DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
9577         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9578         ExactAssemblyScope aas(GetVIXLAssembler(),
9579                                vixl32::k32BitT32InstructionSizeInBytes,
9580                                CodeBufferCheckScope::kMaximumSize);
9581         __ bl(GetFrameEntryLabel());
9582         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9583       }
9584       break;
9585     case CodePtrLocation::kCallCriticalNative: {
9586       size_t out_frame_size =
9587           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
9588                                     kAapcsStackAlignment,
9589                                     GetCriticalNativeDirectCallFrameSize>(invoke);
9590       call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
9591       // Move the result when needed due to native and managed ABI mismatch.
9592       switch (invoke->GetType()) {
9593         case DataType::Type::kFloat32:
9594           __ Vmov(s0, r0);
9595           break;
9596         case DataType::Type::kFloat64:
9597           __ Vmov(d0, r0, r1);
9598           break;
9599         case DataType::Type::kBool:
9600         case DataType::Type::kInt8:
9601         case DataType::Type::kUint16:
9602         case DataType::Type::kInt16:
9603         case DataType::Type::kInt32:
9604         case DataType::Type::kInt64:
9605         case DataType::Type::kVoid:
9606           break;
9607         default:
9608           DCHECK(false) << invoke->GetType();
9609           break;
9610       }
9611       if (out_frame_size != 0u) {
9612         DecreaseFrame(out_frame_size);
9613       }
9614       break;
9615     }
9616     case CodePtrLocation::kCallArtMethod:
9617       call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
9618       break;
9619   }
9620 
9621   DCHECK(!IsLeafMethod());
9622 }
9623 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_location,SlowPathCode * slow_path)9624 void CodeGeneratorARMVIXL::GenerateVirtualCall(
9625     HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
9626   vixl32::Register temp = RegisterFrom(temp_location);
9627   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9628       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
9629 
9630   // Use the calling convention instead of the location of the receiver, as
9631   // intrinsics may have put the receiver in a different register. In the intrinsics
9632   // slow path, the arguments have been moved to the right place, so here we are
9633   // guaranteed that the receiver is the first register of the calling convention.
9634   InvokeDexCallingConventionARMVIXL calling_convention;
9635   vixl32::Register receiver = calling_convention.GetRegisterAt(0);
9636   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
9637   {
9638     // Make sure the pc is recorded immediately after the `ldr` instruction.
9639     ExactAssemblyScope aas(GetVIXLAssembler(),
9640                            vixl32::kMaxInstructionSizeInBytes,
9641                            CodeBufferCheckScope::kMaximumSize);
9642     // /* HeapReference<Class> */ temp = receiver->klass_
9643     __ ldr(temp, MemOperand(receiver, class_offset));
9644     MaybeRecordImplicitNullCheck(invoke);
9645   }
9646   // Instead of simply (possibly) unpoisoning `temp` here, we should
9647   // emit a read barrier for the previous class reference load.
9648   // However this is not required in practice, as this is an
9649   // intermediate/temporary reference and because the current
9650   // concurrent copying collector keeps the from-space memory
9651   // intact/accessible until the end of the marking phase (the
9652   // concurrent copying collector may not in the future).
9653   GetAssembler()->MaybeUnpoisonHeapReference(temp);
9654 
9655   // If we're compiling baseline, update the inline cache.
9656   MaybeGenerateInlineCacheCheck(invoke, temp);
9657 
9658   // temp = temp->GetMethodAt(method_offset);
9659   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
9660       kArmPointerSize).Int32Value();
9661   GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
9662   // LR = temp->GetEntryPoint();
9663   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
9664   {
9665     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9666     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9667     ExactAssemblyScope aas(GetVIXLAssembler(),
9668                            vixl32::k16BitT32InstructionSizeInBytes,
9669                            CodeBufferCheckScope::kExactSize);
9670     // LR();
9671     __ blx(lr);
9672     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9673   }
9674 }
9675 
NewBootImageIntrinsicPatch(uint32_t intrinsic_data)9676 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch(
9677     uint32_t intrinsic_data) {
9678   return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_other_patches_);
9679 }
9680 
NewBootImageRelRoPatch(uint32_t boot_image_offset)9681 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
9682     uint32_t boot_image_offset) {
9683   return NewPcRelativePatch(/* dex_file= */ nullptr,
9684                             boot_image_offset,
9685                             &boot_image_other_patches_);
9686 }
9687 
NewBootImageMethodPatch(MethodReference target_method)9688 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
9689     MethodReference target_method) {
9690   return NewPcRelativePatch(
9691       target_method.dex_file, target_method.index, &boot_image_method_patches_);
9692 }
9693 
NewMethodBssEntryPatch(MethodReference target_method)9694 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch(
9695     MethodReference target_method) {
9696   return NewPcRelativePatch(
9697       target_method.dex_file, target_method.index, &method_bss_entry_patches_);
9698 }
9699 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9700 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch(
9701     const DexFile& dex_file, dex::TypeIndex type_index) {
9702   return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_);
9703 }
9704 
NewAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9705 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewAppImageTypePatch(
9706     const DexFile& dex_file, dex::TypeIndex type_index) {
9707   return NewPcRelativePatch(&dex_file, type_index.index_, &app_image_type_patches_);
9708 }
9709 
NewTypeBssEntryPatch(HLoadClass * load_class)9710 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch(
9711     HLoadClass* load_class) {
9712   const DexFile& dex_file = load_class->GetDexFile();
9713   dex::TypeIndex type_index = load_class->GetTypeIndex();
9714   ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
9715   switch (load_class->GetLoadKind()) {
9716     case HLoadClass::LoadKind::kBssEntry:
9717       patches = &type_bss_entry_patches_;
9718       break;
9719     case HLoadClass::LoadKind::kBssEntryPublic:
9720       patches = &public_type_bss_entry_patches_;
9721       break;
9722     case HLoadClass::LoadKind::kBssEntryPackage:
9723       patches = &package_type_bss_entry_patches_;
9724       break;
9725     default:
9726       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
9727       UNREACHABLE();
9728   }
9729   return NewPcRelativePatch(&dex_file, type_index.index_, patches);
9730 }
9731 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index)9732 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch(
9733     const DexFile& dex_file, dex::StringIndex string_index) {
9734   return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_);
9735 }
9736 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index)9737 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch(
9738     const DexFile& dex_file, dex::StringIndex string_index) {
9739   return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_);
9740 }
9741 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,ArenaDeque<PcRelativePatchInfo> * patches)9742 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
9743     const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
9744   patches->emplace_back(dex_file, offset_or_index);
9745   return &patches->back();
9746 }
9747 
EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset)9748 void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) {
9749   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
9750   DCHECK(!GetCompilerOptions().IsJitCompiler());
9751   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
9752   vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label;
9753   __ bind(bl_label);
9754   vixl32::Label placeholder_label;
9755   __ bl(&placeholder_label);  // Placeholder, patched at link-time.
9756   __ bind(&placeholder_label);
9757 }
9758 
EmitBakerReadBarrierBne(uint32_t custom_data)9759 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
9760   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
9761   if (GetCompilerOptions().IsJitCompiler()) {
9762     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
9763     vixl::aarch32::Label* slow_path_entry = &it->second.label;
9764     __ b(ne, EncodingSize(Wide), slow_path_entry);
9765   } else {
9766     baker_read_barrier_patches_.emplace_back(custom_data);
9767     vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label;
9768     __ bind(patch_label);
9769     vixl32::Label placeholder_label;
9770     __ b(ne, EncodingSize(Wide), &placeholder_label);  // Placeholder, patched at link-time.
9771     __ bind(&placeholder_label);
9772   }
9773 }
9774 
DeduplicateBootImageAddressLiteral(uint32_t address)9775 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
9776   return DeduplicateUint32Literal(address, &uint32_literals_);
9777 }
9778 
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)9779 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
9780     const DexFile& dex_file,
9781     dex::StringIndex string_index,
9782     Handle<mirror::String> handle) {
9783   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
9784   return jit_string_patches_.GetOrCreate(
9785       StringReference(&dex_file, string_index),
9786       [this]() {
9787         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9788       });
9789 }
9790 
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)9791 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file,
9792                                                       dex::TypeIndex type_index,
9793                                                       Handle<mirror::Class> handle) {
9794   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
9795   return jit_class_patches_.GetOrCreate(
9796       TypeReference(&dex_file, type_index),
9797       [this]() {
9798         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9799       });
9800 }
9801 
LoadBootImageRelRoEntry(vixl32::Register reg,uint32_t boot_image_offset)9802 void CodeGeneratorARMVIXL::LoadBootImageRelRoEntry(vixl32::Register reg,
9803                                                    uint32_t boot_image_offset) {
9804   CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
9805   EmitMovwMovtPlaceholder(labels, reg);
9806   __ Ldr(reg, MemOperand(reg, /*offset=*/ 0));
9807 }
9808 
LoadBootImageAddress(vixl32::Register reg,uint32_t boot_image_reference)9809 void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
9810                                                 uint32_t boot_image_reference) {
9811   if (GetCompilerOptions().IsBootImage()) {
9812     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9813         NewBootImageIntrinsicPatch(boot_image_reference);
9814     EmitMovwMovtPlaceholder(labels, reg);
9815   } else if (GetCompilerOptions().GetCompilePic()) {
9816     LoadBootImageRelRoEntry(reg, boot_image_reference);
9817   } else {
9818     DCHECK(GetCompilerOptions().IsJitCompiler());
9819     gc::Heap* heap = Runtime::Current()->GetHeap();
9820     DCHECK(!heap->GetBootImageSpaces().empty());
9821     uintptr_t address =
9822         reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
9823     __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address)));
9824   }
9825 }
9826 
LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,TypeReference target_type)9827 void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,
9828                                                          TypeReference target_type) {
9829   // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
9830   DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9831   PcRelativePatchInfo* labels =
9832       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
9833   EmitMovwMovtPlaceholder(labels, reg);
9834 }
9835 
LoadIntrinsicDeclaringClass(vixl32::Register reg,HInvoke * invoke)9836 void CodeGeneratorARMVIXL::LoadIntrinsicDeclaringClass(vixl32::Register reg, HInvoke* invoke) {
9837   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
9838   if (GetCompilerOptions().IsBootImage()) {
9839     MethodReference target_method = invoke->GetResolvedMethodReference();
9840     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
9841     LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
9842   } else {
9843     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
9844     LoadBootImageAddress(reg, boot_image_offset);
9845   }
9846 }
9847 
LoadClassRootForIntrinsic(vixl::aarch32::Register reg,ClassRoot class_root)9848 void CodeGeneratorARMVIXL::LoadClassRootForIntrinsic(vixl::aarch32::Register reg,
9849                                                      ClassRoot class_root) {
9850   if (GetCompilerOptions().IsBootImage()) {
9851     ScopedObjectAccess soa(Thread::Current());
9852     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
9853     TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
9854     LoadTypeForBootImageIntrinsic(reg, target_type);
9855   } else {
9856     uint32_t boot_image_offset = GetBootImageOffset(class_root);
9857     LoadBootImageAddress(reg, boot_image_offset);
9858   }
9859 }
9860 
9861 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)9862 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
9863     const ArenaDeque<PcRelativePatchInfo>& infos,
9864     ArenaVector<linker::LinkerPatch>* linker_patches) {
9865   for (const PcRelativePatchInfo& info : infos) {
9866     const DexFile* dex_file = info.target_dex_file;
9867     size_t offset_or_index = info.offset_or_index;
9868     DCHECK(info.add_pc_label.IsBound());
9869     uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
9870     // Add MOVW patch.
9871     DCHECK(info.movw_label.IsBound());
9872     uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
9873     linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index));
9874     // Add MOVT patch.
9875     DCHECK(info.movt_label.IsBound());
9876     uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
9877     linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index));
9878   }
9879 }
9880 
9881 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)9882 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
9883                                      const DexFile* target_dex_file,
9884                                      uint32_t pc_insn_offset,
9885                                      uint32_t boot_image_offset) {
9886   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
9887   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
9888 }
9889 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)9890 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
9891   DCHECK(linker_patches->empty());
9892   size_t size =
9893       /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() +
9894       /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
9895       /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() +
9896       /* MOVW+MOVT for each entry */ 2u * app_image_type_patches_.size() +
9897       /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
9898       /* MOVW+MOVT for each entry */ 2u * public_type_bss_entry_patches_.size() +
9899       /* MOVW+MOVT for each entry */ 2u * package_type_bss_entry_patches_.size() +
9900       /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
9901       /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
9902       /* MOVW+MOVT for each entry */ 2u * boot_image_other_patches_.size() +
9903       call_entrypoint_patches_.size() +
9904       baker_read_barrier_patches_.size();
9905   linker_patches->reserve(size);
9906   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
9907     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
9908         boot_image_method_patches_, linker_patches);
9909     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
9910         boot_image_type_patches_, linker_patches);
9911     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
9912         boot_image_string_patches_, linker_patches);
9913   } else {
9914     DCHECK(boot_image_method_patches_.empty());
9915     DCHECK(boot_image_type_patches_.empty());
9916     DCHECK(boot_image_string_patches_.empty());
9917   }
9918   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
9919   if (GetCompilerOptions().IsBootImage()) {
9920     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
9921         boot_image_other_patches_, linker_patches);
9922   } else {
9923     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
9924         boot_image_other_patches_, linker_patches);
9925     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
9926         app_image_type_patches_, linker_patches);
9927   }
9928   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
9929       method_bss_entry_patches_, linker_patches);
9930   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
9931       type_bss_entry_patches_, linker_patches);
9932   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
9933       public_type_bss_entry_patches_, linker_patches);
9934   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
9935       package_type_bss_entry_patches_, linker_patches);
9936   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
9937       string_bss_entry_patches_, linker_patches);
9938   for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) {
9939     DCHECK(info.target_dex_file == nullptr);
9940     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
9941         info.label.GetLocation(), info.offset_or_index));
9942   }
9943   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
9944     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
9945         info.label.GetLocation(), info.custom_data));
9946   }
9947   DCHECK_EQ(size, linker_patches->size());
9948 }
9949 
NeedsThunkCode(const linker::LinkerPatch & patch) const9950 bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
9951   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
9952          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
9953          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
9954 }
9955 
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)9956 void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
9957                                          /*out*/ ArenaVector<uint8_t>* code,
9958                                          /*out*/ std::string* debug_name) {
9959   arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
9960   switch (patch.GetType()) {
9961     case linker::LinkerPatch::Type::kCallRelative: {
9962       // The thunk just uses the entry point in the ArtMethod. This works even for calls
9963       // to the generic JNI and interpreter trampolines.
9964       MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
9965       assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value());
9966       assembler.GetVIXLAssembler()->Bkpt(0);
9967       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
9968         *debug_name = "MethodCallThunk";
9969       }
9970       break;
9971     }
9972     case linker::LinkerPatch::Type::kCallEntrypoint: {
9973       assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset());
9974       assembler.GetVIXLAssembler()->Bkpt(0);
9975       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
9976         *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset());
9977       }
9978       break;
9979     }
9980     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
9981       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
9982       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
9983       break;
9984     }
9985     default:
9986       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
9987       UNREACHABLE();
9988   }
9989 
9990   // Ensure we emit the literal pool if any.
9991   assembler.FinalizeCode();
9992   code->resize(assembler.CodeSize());
9993   MemoryRegion code_region(code->data(), code->size());
9994   assembler.CopyInstructions(code_region);
9995 }
9996 
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)9997 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
9998     uint32_t value,
9999     Uint32ToLiteralMap* map) {
10000   return map->GetOrCreate(
10001       value,
10002       [this, value]() {
10003         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value);
10004       });
10005 }
10006 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10007 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10008   LocationSummary* locations =
10009       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
10010   locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
10011                      Location::RequiresRegister());
10012   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
10013   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
10014   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
10015 }
10016 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)10017 void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
10018   vixl32::Register res = OutputRegister(instr);
10019   vixl32::Register accumulator =
10020       InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
10021   vixl32::Register mul_left =
10022       InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
10023   vixl32::Register mul_right =
10024       InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
10025 
10026   if (instr->GetOpKind() == HInstruction::kAdd) {
10027     __ Mla(res, mul_left, mul_right, accumulator);
10028   } else {
10029     __ Mls(res, mul_left, mul_right, accumulator);
10030   }
10031 }
10032 
VisitBoundType(HBoundType * instruction)10033 void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10034   // Nothing to do, this should be removed during prepare for register allocator.
10035   LOG(FATAL) << "Unreachable";
10036 }
10037 
VisitBoundType(HBoundType * instruction)10038 void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
10039   // Nothing to do, this should be removed during prepare for register allocator.
10040   LOG(FATAL) << "Unreachable";
10041 }
10042 
10043 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)10044 void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10045   LocationSummary* locations =
10046       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
10047   locations->SetInAt(0, Location::RequiresRegister());
10048   if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
10049       codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10050     locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
10051     if (switch_instr->GetStartValue() != 0) {
10052       locations->AddTemp(Location::RequiresRegister());  // We need a temp for the bias.
10053     }
10054   }
10055 }
10056 
10057 // TODO(VIXL): Investigate and reach the parity with old arm codegen.
VisitPackedSwitch(HPackedSwitch * switch_instr)10058 void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
10059   int32_t lower_bound = switch_instr->GetStartValue();
10060   uint32_t num_entries = switch_instr->GetNumEntries();
10061   LocationSummary* locations = switch_instr->GetLocations();
10062   vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
10063   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
10064 
10065   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
10066       !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
10067     // Create a series of compare/jumps.
10068     UseScratchRegisterScope temps(GetVIXLAssembler());
10069     vixl32::Register temp_reg = temps.Acquire();
10070     // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
10071     // the immediate, because IP is used as the destination register. For the other
10072     // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
10073     // and they can be encoded in the instruction without making use of IP register.
10074     __ Adds(temp_reg, value_reg, -lower_bound);
10075 
10076     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
10077     // Jump to successors[0] if value == lower_bound.
10078     __ B(eq, codegen_->GetLabelOf(successors[0]));
10079     int32_t last_index = 0;
10080     for (; num_entries - last_index > 2; last_index += 2) {
10081       __ Adds(temp_reg, temp_reg, -2);
10082       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
10083       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
10084       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
10085       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
10086     }
10087     if (num_entries - last_index == 2) {
10088       // The last missing case_value.
10089       __ Cmp(temp_reg, 1);
10090       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
10091     }
10092 
10093     // And the default for any other value.
10094     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
10095       __ B(codegen_->GetLabelOf(default_block));
10096     }
10097   } else {
10098     // Create a table lookup.
10099     vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
10100 
10101     JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
10102 
10103     // Remove the bias.
10104     vixl32::Register key_reg;
10105     if (lower_bound != 0) {
10106       key_reg = RegisterFrom(locations->GetTemp(1));
10107       __ Sub(key_reg, value_reg, lower_bound);
10108     } else {
10109       key_reg = value_reg;
10110     }
10111 
10112     // Check whether the value is in the table, jump to default block if not.
10113     __ Cmp(key_reg, num_entries - 1);
10114     __ B(hi, codegen_->GetLabelOf(default_block));
10115 
10116     UseScratchRegisterScope temps(GetVIXLAssembler());
10117     vixl32::Register jump_offset = temps.Acquire();
10118 
10119     // Load jump offset from the table.
10120     {
10121       const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
10122       ExactAssemblyScope aas(GetVIXLAssembler(),
10123                              (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size,
10124                              CodeBufferCheckScope::kMaximumSize);
10125       __ adr(table_base, jump_table->GetTableStartLabel());
10126       __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
10127 
10128       // Jump to target block by branching to table_base(pc related) + offset.
10129       vixl32::Register target_address = table_base;
10130       __ add(target_address, table_base, jump_offset);
10131       __ bx(target_address);
10132 
10133       jump_table->EmitTable(codegen_);
10134     }
10135   }
10136 }
10137 
10138 // Copy the result of a call into the given target.
MoveFromReturnRegister(Location trg,DataType::Type type)10139 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) {
10140   if (!trg.IsValid()) {
10141     DCHECK_EQ(type, DataType::Type::kVoid);
10142     return;
10143   }
10144 
10145   DCHECK_NE(type, DataType::Type::kVoid);
10146 
10147   Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
10148   if (return_loc.Equals(trg)) {
10149     return;
10150   }
10151 
10152   // Let the parallel move resolver take care of all of this.
10153   HParallelMove parallel_move(GetGraph()->GetAllocator());
10154   parallel_move.AddMove(return_loc, trg, type, nullptr);
10155   GetMoveResolver()->EmitNativeCode(&parallel_move);
10156 }
10157 
VisitClassTableGet(HClassTableGet * instruction)10158 void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10159   LocationSummary* locations =
10160       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
10161   locations->SetInAt(0, Location::RequiresRegister());
10162   locations->SetOut(Location::RequiresRegister());
10163 }
10164 
VisitClassTableGet(HClassTableGet * instruction)10165 void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
10166   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
10167     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
10168         instruction->GetIndex(), kArmPointerSize).SizeValue();
10169     GetAssembler()->LoadFromOffset(kLoadWord,
10170                                    OutputRegister(instruction),
10171                                    InputRegisterAt(instruction, 0),
10172                                    method_offset);
10173   } else {
10174     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
10175         instruction->GetIndex(), kArmPointerSize));
10176     GetAssembler()->LoadFromOffset(kLoadWord,
10177                                    OutputRegister(instruction),
10178                                    InputRegisterAt(instruction, 0),
10179                                    mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
10180     GetAssembler()->LoadFromOffset(kLoadWord,
10181                                    OutputRegister(instruction),
10182                                    OutputRegister(instruction),
10183                                    method_offset);
10184   }
10185 }
10186 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,VIXLUInt32Literal * literal,uint64_t index_in_table)10187 static void PatchJitRootUse(uint8_t* code,
10188                             const uint8_t* roots_data,
10189                             VIXLUInt32Literal* literal,
10190                             uint64_t index_in_table) {
10191   DCHECK(literal->IsBound());
10192   uint32_t literal_offset = literal->GetLocation();
10193   uintptr_t address =
10194       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
10195   uint8_t* data = code + literal_offset;
10196   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
10197 }
10198 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)10199 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
10200   for (const auto& entry : jit_string_patches_) {
10201     const StringReference& string_reference = entry.first;
10202     VIXLUInt32Literal* table_entry_literal = entry.second;
10203     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
10204     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10205   }
10206   for (const auto& entry : jit_class_patches_) {
10207     const TypeReference& type_reference = entry.first;
10208     VIXLUInt32Literal* table_entry_literal = entry.second;
10209     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
10210     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
10211   }
10212 }
10213 
EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo * labels,vixl32::Register out)10214 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
10215     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
10216     vixl32::Register out) {
10217   ExactAssemblyScope aas(GetVIXLAssembler(),
10218                          3 * vixl32::kMaxInstructionSizeInBytes,
10219                          CodeBufferCheckScope::kMaximumSize);
10220   // TODO(VIXL): Think about using mov instead of movw.
10221   __ bind(&labels->movw_label);
10222   __ movw(out, /* operand= */ 0u);
10223   __ bind(&labels->movt_label);
10224   __ movt(out, /* operand= */ 0u);
10225   __ bind(&labels->add_pc_label);
10226   __ add(out, out, pc);
10227 }
10228 
10229 #undef __
10230 #undef QUICK_ENTRY_POINT
10231 #undef TODO_VIXL32
10232 
10233 #define __ assembler.GetVIXLAssembler()->
10234 
EmitGrayCheckAndFastPath(ArmVIXLAssembler & assembler,vixl32::Register base_reg,vixl32::MemOperand & lock_word,vixl32::Label * slow_path,int32_t raw_ldr_offset,vixl32::Label * throw_npe=nullptr)10235 static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
10236                                      vixl32::Register base_reg,
10237                                      vixl32::MemOperand& lock_word,
10238                                      vixl32::Label* slow_path,
10239                                      int32_t raw_ldr_offset,
10240                                      vixl32::Label* throw_npe = nullptr) {
10241   // Load the lock word containing the rb_state.
10242   __ Ldr(ip, lock_word);
10243   // Given the numeric representation, it's enough to check the low bit of the rb_state.
10244   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
10245   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
10246   __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
10247   __ B(ne, slow_path, /* is_far_target= */ false);
10248   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
10249   if (throw_npe != nullptr) {
10250     __ Bind(throw_npe);
10251   }
10252   __ Add(lr, lr, raw_ldr_offset);
10253   // Introduce a dependency on the lock_word including rb_state,
10254   // to prevent load-load reordering, and without using
10255   // a memory barrier (which would be more expensive).
10256   __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
10257   __ Bx(lr);          // And return back to the function.
10258   // Note: The fake dependency is unnecessary for the slow path.
10259 }
10260 
10261 // Load the read barrier introspection entrypoint in register `entrypoint`
LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler & assembler)10262 static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
10263   // The register where the read barrier introspection entrypoint is loaded
10264   // is the marking register. We clobber it here and the entrypoint restores it to 1.
10265   vixl32::Register entrypoint = mr;
10266   // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
10267   DCHECK_EQ(ip.GetCode(), 12u);
10268   const int32_t entry_point_offset =
10269       Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
10270   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
10271   return entrypoint;
10272 }
10273 
CompileBakerReadBarrierThunk(ArmVIXLAssembler & assembler,uint32_t encoded_data,std::string * debug_name)10274 void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
10275                                                         uint32_t encoded_data,
10276                                                         /*out*/ std::string* debug_name) {
10277   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
10278   switch (kind) {
10279     case BakerReadBarrierKind::kField: {
10280       vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10281       CheckValidReg(base_reg.GetCode());
10282       vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
10283       CheckValidReg(holder_reg.GetCode());
10284       BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10285       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10286       temps.Exclude(ip);
10287       // In the case of a field load, if `base_reg` differs from
10288       // `holder_reg`, the offset was too large and we must have emitted (during the construction
10289       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
10290       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
10291       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
10292       // not necessarily do that check before going to the thunk.
10293       vixl32::Label throw_npe_label;
10294       vixl32::Label* throw_npe = nullptr;
10295       if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
10296         throw_npe = &throw_npe_label;
10297         __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false);
10298       }
10299       // Check if the holder is gray and, if not, add fake dependency to the base register
10300       // and return to the LDR instruction to load the reference. Otherwise, use introspection
10301       // to load the reference and call the entrypoint that performs further checks on the
10302       // reference and marks it if needed.
10303       vixl32::Label slow_path;
10304       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
10305       const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
10306           ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
10307           : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
10308       EmitGrayCheckAndFastPath(
10309           assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
10310       __ Bind(&slow_path);
10311       const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10312                                  raw_ldr_offset;
10313       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10314       if (width == BakerReadBarrierWidth::kWide) {
10315         MemOperand ldr_half_address(lr, ldr_offset + 2);
10316         __ Ldrh(ip, ldr_half_address);        // Load the LDR immediate half-word with "Rt | imm12".
10317         __ Ubfx(ip, ip, 0, 12);               // Extract the offset imm12.
10318         __ Ldr(ip, MemOperand(base_reg, ip));   // Load the reference.
10319       } else {
10320         MemOperand ldr_address(lr, ldr_offset);
10321         __ Ldrh(ip, ldr_address);             // Load the LDR immediate, encoding T1.
10322         __ Add(ep_reg,                        // Adjust the entrypoint address to the entrypoint
10323                ep_reg,                        // for narrow LDR.
10324                Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
10325         __ Ubfx(ip, ip, 6, 5);                // Extract the imm5, i.e. offset / 4.
10326         __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2));   // Load the reference.
10327       }
10328       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
10329       __ Bx(ep_reg);                          // Jump to the entrypoint.
10330       break;
10331     }
10332     case BakerReadBarrierKind::kArray: {
10333       vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10334       CheckValidReg(base_reg.GetCode());
10335       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10336                 BakerReadBarrierSecondRegField::Decode(encoded_data));
10337       DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10338       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10339       temps.Exclude(ip);
10340       vixl32::Label slow_path;
10341       int32_t data_offset =
10342           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
10343       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
10344       DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
10345       const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
10346       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
10347       __ Bind(&slow_path);
10348       const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10349                                  raw_ldr_offset;
10350       MemOperand ldr_address(lr, ldr_offset + 2);
10351       __ Ldrb(ip, ldr_address);               // Load the LDR (register) byte with "00 | imm2 | Rm",
10352                                               // i.e. Rm+32 because the scale in imm2 is 2.
10353       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10354       __ Bfi(ep_reg, ip, 3, 6);               // Insert ip to the entrypoint address to create
10355                                               // a switch case target based on the index register.
10356       __ Mov(ip, base_reg);                   // Move the base register to ip0.
10357       __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
10358       break;
10359     }
10360     case BakerReadBarrierKind::kGcRoot:
10361     case BakerReadBarrierKind::kIntrinsicCas: {
10362       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
10363       // and it does not have a forwarding address), call the correct introspection entrypoint;
10364       // otherwise return the reference (or the extracted forwarding address).
10365       // There is no gray bit check for GC roots.
10366       vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10367       CheckValidReg(root_reg.GetCode());
10368       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10369                 BakerReadBarrierSecondRegField::Decode(encoded_data));
10370       BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10371       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10372       temps.Exclude(ip);
10373       vixl32::Label return_label, not_marked, forwarding_address;
10374       __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false);
10375       MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
10376       __ Ldr(ip, lock_word);
10377       __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
10378       __ B(eq, &not_marked);
10379       __ Bind(&return_label);
10380       __ Bx(lr);
10381       __ Bind(&not_marked);
10382       static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
10383                     "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
10384                     " the highest bits and the 'forwarding address' state to have all bits set");
10385       __ Cmp(ip, Operand(0xc0000000));
10386       __ B(hs, &forwarding_address);
10387       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10388       // Adjust the art_quick_read_barrier_mark_introspection address
10389       // in kBakerCcEntrypointRegister to one of
10390       //     art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},intrinsic_cas}.
10391       if (kind == BakerReadBarrierKind::kIntrinsicCas) {
10392         DCHECK(width == BakerReadBarrierWidth::kWide);
10393         DCHECK(!root_reg.IsLow());
10394       }
10395       int32_t entrypoint_offset =
10396           (kind == BakerReadBarrierKind::kGcRoot)
10397               ? (width == BakerReadBarrierWidth::kWide)
10398                   ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
10399                   : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
10400               : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET;
10401       __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
10402       __ Mov(ip, root_reg);
10403       __ Bx(ep_reg);
10404       __ Bind(&forwarding_address);
10405       __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
10406       __ Bx(lr);
10407       break;
10408     }
10409     default:
10410       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
10411       UNREACHABLE();
10412   }
10413 
10414   // For JIT, the slow path is considered part of the compiled method,
10415   // so JIT should pass null as `debug_name`.
10416   DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
10417   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10418     std::ostringstream oss;
10419     oss << "BakerReadBarrierThunk";
10420     switch (kind) {
10421       case BakerReadBarrierKind::kField:
10422         oss << "Field";
10423         if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10424           oss << "Wide";
10425         }
10426         oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
10427             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
10428         break;
10429       case BakerReadBarrierKind::kArray:
10430         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10431         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10432                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10433         DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10434         break;
10435       case BakerReadBarrierKind::kGcRoot:
10436         oss << "GcRoot";
10437         if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10438           oss << "Wide";
10439         }
10440         oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10441         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10442                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10443         break;
10444       case BakerReadBarrierKind::kIntrinsicCas:
10445         oss << "IntrinsicCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10446         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10447                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10448         DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10449         break;
10450     }
10451     *debug_name = oss.str();
10452   }
10453 }
10454 
10455 #undef __
10456 
10457 }  // namespace arm
10458 }  // namespace art
10459