1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm_vixl.h"
18 
19 #include "arch/arm/asm_support_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "arch/arm/jni_frame_arm.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "base/bit_utils_iterator.h"
25 #include "class_root-inl.h"
26 #include "class_table.h"
27 #include "code_generator_utils.h"
28 #include "common_arm.h"
29 #include "compiled_method.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "gc/accounting/card_table.h"
32 #include "gc/space/image_space.h"
33 #include "heap_poisoning.h"
34 #include "interpreter/mterp/nterp.h"
35 #include "intrinsics.h"
36 #include "intrinsics_arm_vixl.h"
37 #include "linker/linker_patch.h"
38 #include "mirror/array-inl.h"
39 #include "mirror/class-inl.h"
40 #include "mirror/var_handle.h"
41 #include "scoped_thread_state_change-inl.h"
42 #include "thread.h"
43 #include "utils/arm/assembler_arm_vixl.h"
44 #include "utils/arm/managed_register_arm.h"
45 #include "utils/assembler.h"
46 #include "utils/stack_checks.h"
47 
48 namespace art {
49 namespace arm {
50 
51 namespace vixl32 = vixl::aarch32;
52 using namespace vixl32;  // NOLINT(build/namespaces)
53 
54 using helpers::DRegisterFrom;
55 using helpers::HighRegisterFrom;
56 using helpers::InputDRegisterAt;
57 using helpers::InputOperandAt;
58 using helpers::InputRegister;
59 using helpers::InputRegisterAt;
60 using helpers::InputSRegisterAt;
61 using helpers::InputVRegister;
62 using helpers::InputVRegisterAt;
63 using helpers::Int32ConstantFrom;
64 using helpers::Int64ConstantFrom;
65 using helpers::LocationFrom;
66 using helpers::LowRegisterFrom;
67 using helpers::LowSRegisterFrom;
68 using helpers::OperandFrom;
69 using helpers::OutputRegister;
70 using helpers::OutputSRegister;
71 using helpers::OutputVRegister;
72 using helpers::RegisterFrom;
73 using helpers::SRegisterFrom;
74 using helpers::Uint64ConstantFrom;
75 
76 using vixl::EmissionCheckScope;
77 using vixl::ExactAssemblyScope;
78 using vixl::CodeBufferCheckScope;
79 
80 using RegisterList = vixl32::RegisterList;
81 
ExpectedPairLayout(Location location)82 static bool ExpectedPairLayout(Location location) {
83   // We expected this for both core and fpu register pairs.
84   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
85 }
86 // Use a local definition to prevent copying mistakes.
87 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
88 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
89 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
90 
91 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
92 // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
93 // For the Baker read barrier implementation using link-time generated thunks we need to split
94 // the offset explicitly.
95 constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
96 
97 // Using a base helps identify when we hit Marking Register check breakpoints.
98 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
99 
100 #ifdef __
101 #error "ARM Codegen VIXL macro-assembler macro already defined."
102 #endif
103 
104 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
105 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()->  // NOLINT
106 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
107 
108 // Marker that code is yet to be, and must, be implemented.
109 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
110 
CanEmitNarrowLdr(vixl32::Register rt,vixl32::Register rn,uint32_t offset)111 static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
112   return rt.IsLow() && rn.IsLow() && offset < 32u;
113 }
114 
115 class EmitAdrCode {
116  public:
EmitAdrCode(ArmVIXLMacroAssembler * assembler,vixl32::Register rd,vixl32::Label * label)117   EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
118       : assembler_(assembler), rd_(rd), label_(label) {
119     DCHECK(!assembler->AllowMacroInstructions());  // In ExactAssemblyScope.
120     adr_location_ = assembler->GetCursorOffset();
121     assembler->adr(EncodingSize(Wide), rd, label);
122   }
123 
~EmitAdrCode()124   ~EmitAdrCode() {
125     DCHECK(label_->IsBound());
126     // The ADR emitted by the assembler does not set the Thumb mode bit we need.
127     // TODO: Maybe extend VIXL to allow ADR for return address?
128     uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
129     // Expecting ADR encoding T3 with `(offset & 1) == 0`.
130     DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u);           // Check bits 24-31, except 26.
131     DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu);           // Check bits 16-23.
132     DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode());   // Check bits 8-11 and 15.
133     DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u);           // Check bit 0, i.e. the `offset & 1`.
134     // Add the Thumb mode bit.
135     raw_adr[2] |= 0x01u;
136   }
137 
138  private:
139   ArmVIXLMacroAssembler* const assembler_;
140   vixl32::Register rd_;
141   vixl32::Label* const label_;
142   int32_t adr_location_;
143 };
144 
OneRegInReferenceOutSaveEverythingCallerSaves()145 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
146   InvokeRuntimeCallingConventionARMVIXL calling_convention;
147   RegisterSet caller_saves = RegisterSet::Empty();
148   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
149   // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
150   // that the the kPrimNot result register is the same as the first argument register.
151   return caller_saves;
152 }
153 
154 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
155 // for each live D registers they treat two corresponding S registers as live ones.
156 //
157 // Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
158 // from a list of contiguous S registers a list of contiguous D registers (processing first/last
159 // S registers corner cases) and save/restore this new list treating them as D registers.
160 // - decreasing code size
161 // - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
162 //   restored and then used in regular non SlowPath code as D register.
163 //
164 // For the following example (v means the S register is live):
165 //   D names: |    D0   |    D1   |    D2   |    D4   | ...
166 //   S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
167 //   Live?    |    |  v |  v |  v |  v |  v |  v |    | ...
168 //
169 // S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
170 // as D registers.
171 //
172 // TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
173 // for lists of floating-point registers.
SaveContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)174 static size_t SaveContiguousSRegisterList(size_t first,
175                                           size_t last,
176                                           CodeGenerator* codegen,
177                                           size_t stack_offset) {
178   static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
179   static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
180   DCHECK_LE(first, last);
181   if ((first == last) && (first == 0)) {
182     __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
183     return stack_offset + kSRegSizeInBytes;
184   }
185   if (first % 2 == 1) {
186     __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
187     stack_offset += kSRegSizeInBytes;
188   }
189 
190   bool save_last = false;
191   if (last % 2 == 0) {
192     save_last = true;
193     --last;
194   }
195 
196   if (first < last) {
197     vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
198     DCHECK_EQ((last - first + 1) % 2, 0u);
199     size_t number_of_d_regs = (last - first + 1) / 2;
200 
201     if (number_of_d_regs == 1) {
202       __ Vstr(d_reg, MemOperand(sp, stack_offset));
203     } else if (number_of_d_regs > 1) {
204       UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
205       vixl32::Register base = sp;
206       if (stack_offset != 0) {
207         base = temps.Acquire();
208         __ Add(base, sp, Operand::From(stack_offset));
209       }
210       __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
211     }
212     stack_offset += number_of_d_regs * kDRegSizeInBytes;
213   }
214 
215   if (save_last) {
216     __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
217     stack_offset += kSRegSizeInBytes;
218   }
219 
220   return stack_offset;
221 }
222 
RestoreContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)223 static size_t RestoreContiguousSRegisterList(size_t first,
224                                              size_t last,
225                                              CodeGenerator* codegen,
226                                              size_t stack_offset) {
227   static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
228   static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
229   DCHECK_LE(first, last);
230   if ((first == last) && (first == 0)) {
231     __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
232     return stack_offset + kSRegSizeInBytes;
233   }
234   if (first % 2 == 1) {
235     __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
236     stack_offset += kSRegSizeInBytes;
237   }
238 
239   bool restore_last = false;
240   if (last % 2 == 0) {
241     restore_last = true;
242     --last;
243   }
244 
245   if (first < last) {
246     vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
247     DCHECK_EQ((last - first + 1) % 2, 0u);
248     size_t number_of_d_regs = (last - first + 1) / 2;
249     if (number_of_d_regs == 1) {
250       __ Vldr(d_reg, MemOperand(sp, stack_offset));
251     } else if (number_of_d_regs > 1) {
252       UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
253       vixl32::Register base = sp;
254       if (stack_offset != 0) {
255         base = temps.Acquire();
256         __ Add(base, sp, Operand::From(stack_offset));
257       }
258       __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
259     }
260     stack_offset += number_of_d_regs * kDRegSizeInBytes;
261   }
262 
263   if (restore_last) {
264     __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
265     stack_offset += kSRegSizeInBytes;
266   }
267 
268   return stack_offset;
269 }
270 
GetLoadOperandType(DataType::Type type)271 static LoadOperandType GetLoadOperandType(DataType::Type type) {
272   switch (type) {
273     case DataType::Type::kReference:
274       return kLoadWord;
275     case DataType::Type::kBool:
276     case DataType::Type::kUint8:
277       return kLoadUnsignedByte;
278     case DataType::Type::kInt8:
279       return kLoadSignedByte;
280     case DataType::Type::kUint16:
281       return kLoadUnsignedHalfword;
282     case DataType::Type::kInt16:
283       return kLoadSignedHalfword;
284     case DataType::Type::kInt32:
285       return kLoadWord;
286     case DataType::Type::kInt64:
287       return kLoadWordPair;
288     case DataType::Type::kFloat32:
289       return kLoadSWord;
290     case DataType::Type::kFloat64:
291       return kLoadDWord;
292     default:
293       LOG(FATAL) << "Unreachable type " << type;
294       UNREACHABLE();
295   }
296 }
297 
GetStoreOperandType(DataType::Type type)298 static StoreOperandType GetStoreOperandType(DataType::Type type) {
299   switch (type) {
300     case DataType::Type::kReference:
301       return kStoreWord;
302     case DataType::Type::kBool:
303     case DataType::Type::kUint8:
304     case DataType::Type::kInt8:
305       return kStoreByte;
306     case DataType::Type::kUint16:
307     case DataType::Type::kInt16:
308       return kStoreHalfword;
309     case DataType::Type::kInt32:
310       return kStoreWord;
311     case DataType::Type::kInt64:
312       return kStoreWordPair;
313     case DataType::Type::kFloat32:
314       return kStoreSWord;
315     case DataType::Type::kFloat64:
316       return kStoreDWord;
317     default:
318       LOG(FATAL) << "Unreachable type " << type;
319       UNREACHABLE();
320   }
321 }
322 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)323 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
324   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
325   size_t orig_offset = stack_offset;
326 
327   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
328   for (uint32_t i : LowToHighBits(core_spills)) {
329     // If the register holds an object, update the stack mask.
330     if (locations->RegisterContainsObject(i)) {
331       locations->SetStackBit(stack_offset / kVRegSize);
332     }
333     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
334     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
335     saved_core_stack_offsets_[i] = stack_offset;
336     stack_offset += kArmWordSize;
337   }
338 
339   CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
340   arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
341 
342   uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
343   orig_offset = stack_offset;
344   for (uint32_t i : LowToHighBits(fp_spills)) {
345     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
346     saved_fpu_stack_offsets_[i] = stack_offset;
347     stack_offset += kArmWordSize;
348   }
349 
350   stack_offset = orig_offset;
351   while (fp_spills != 0u) {
352     uint32_t begin = CTZ(fp_spills);
353     uint32_t tmp = fp_spills + (1u << begin);
354     fp_spills &= tmp;  // Clear the contiguous range of 1s.
355     uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
356     stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
357   }
358   DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
359 }
360 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)361 void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
362   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
363   size_t orig_offset = stack_offset;
364 
365   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
366   for (uint32_t i : LowToHighBits(core_spills)) {
367     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
368     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
369     stack_offset += kArmWordSize;
370   }
371 
372   // TODO(VIXL): Check the coherency of stack_offset after this with a test.
373   CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
374   arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
375 
376   uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
377   while (fp_spills != 0u) {
378     uint32_t begin = CTZ(fp_spills);
379     uint32_t tmp = fp_spills + (1u << begin);
380     fp_spills &= tmp;  // Clear the contiguous range of 1s.
381     uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
382     stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
383   }
384   DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
385 }
386 
387 class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
388  public:
NullCheckSlowPathARMVIXL(HNullCheck * instruction)389   explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
390 
EmitNativeCode(CodeGenerator * codegen)391   void EmitNativeCode(CodeGenerator* codegen) override {
392     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
393     __ Bind(GetEntryLabel());
394     if (instruction_->CanThrowIntoCatchBlock()) {
395       // Live registers will be restored in the catch block if caught.
396       SaveLiveRegisters(codegen, instruction_->GetLocations());
397     }
398     arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
399                                instruction_,
400                                instruction_->GetDexPc(),
401                                this);
402     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
403   }
404 
IsFatal() const405   bool IsFatal() const override { return true; }
406 
GetDescription() const407   const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
408 
409  private:
410   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
411 };
412 
413 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
414  public:
DivZeroCheckSlowPathARMVIXL(HDivZeroCheck * instruction)415   explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
416       : SlowPathCodeARMVIXL(instruction) {}
417 
EmitNativeCode(CodeGenerator * codegen)418   void EmitNativeCode(CodeGenerator* codegen) override {
419     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
420     __ Bind(GetEntryLabel());
421     arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
422     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
423   }
424 
IsFatal() const425   bool IsFatal() const override { return true; }
426 
GetDescription() const427   const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
428 
429  private:
430   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
431 };
432 
433 class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
434  public:
SuspendCheckSlowPathARMVIXL(HSuspendCheck * instruction,HBasicBlock * successor)435   SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
436       : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
437 
EmitNativeCode(CodeGenerator * codegen)438   void EmitNativeCode(CodeGenerator* codegen) override {
439     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
440     __ Bind(GetEntryLabel());
441     arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
442     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
443     if (successor_ == nullptr) {
444       __ B(GetReturnLabel());
445     } else {
446       __ B(arm_codegen->GetLabelOf(successor_));
447     }
448   }
449 
GetReturnLabel()450   vixl32::Label* GetReturnLabel() {
451     DCHECK(successor_ == nullptr);
452     return &return_label_;
453   }
454 
GetSuccessor() const455   HBasicBlock* GetSuccessor() const {
456     return successor_;
457   }
458 
GetDescription() const459   const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
460 
461  private:
462   // If not null, the block to branch to after the suspend check.
463   HBasicBlock* const successor_;
464 
465   // If `successor_` is null, the label to branch to after the suspend check.
466   vixl32::Label return_label_;
467 
468   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
469 };
470 
471 class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
472  public:
BoundsCheckSlowPathARMVIXL(HBoundsCheck * instruction)473   explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
474       : SlowPathCodeARMVIXL(instruction) {}
475 
EmitNativeCode(CodeGenerator * codegen)476   void EmitNativeCode(CodeGenerator* codegen) override {
477     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
478     LocationSummary* locations = instruction_->GetLocations();
479 
480     __ Bind(GetEntryLabel());
481     if (instruction_->CanThrowIntoCatchBlock()) {
482       // Live registers will be restored in the catch block if caught.
483       SaveLiveRegisters(codegen, instruction_->GetLocations());
484     }
485     // We're moving two locations to locations that could overlap, so we need a parallel
486     // move resolver.
487     InvokeRuntimeCallingConventionARMVIXL calling_convention;
488     codegen->EmitParallelMoves(
489         locations->InAt(0),
490         LocationFrom(calling_convention.GetRegisterAt(0)),
491         DataType::Type::kInt32,
492         locations->InAt(1),
493         LocationFrom(calling_convention.GetRegisterAt(1)),
494         DataType::Type::kInt32);
495     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
496         ? kQuickThrowStringBounds
497         : kQuickThrowArrayBounds;
498     arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
499     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
500     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
501   }
502 
IsFatal() const503   bool IsFatal() const override { return true; }
504 
GetDescription() const505   const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
506 
507  private:
508   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
509 };
510 
511 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
512  public:
LoadClassSlowPathARMVIXL(HLoadClass * cls,HInstruction * at)513   LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
514       : SlowPathCodeARMVIXL(at), cls_(cls) {
515     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
516     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
517   }
518 
EmitNativeCode(CodeGenerator * codegen)519   void EmitNativeCode(CodeGenerator* codegen) override {
520     LocationSummary* locations = instruction_->GetLocations();
521     Location out = locations->Out();
522     const uint32_t dex_pc = instruction_->GetDexPc();
523     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
524     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
525 
526     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
527     __ Bind(GetEntryLabel());
528     SaveLiveRegisters(codegen, locations);
529 
530     InvokeRuntimeCallingConventionARMVIXL calling_convention;
531     if (must_resolve_type) {
532       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()));
533       dex::TypeIndex type_index = cls_->GetTypeIndex();
534       __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
535       if (cls_->NeedsAccessCheck()) {
536         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
537         arm_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
538       } else {
539         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
540         arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
541       }
542       // If we also must_do_clinit, the resolved type is now in the correct register.
543     } else {
544       DCHECK(must_do_clinit);
545       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
546       arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
547     }
548     if (must_do_clinit) {
549       arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
550       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
551     }
552 
553     // Move the class to the desired location.
554     if (out.IsValid()) {
555       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
556       arm_codegen->Move32(locations->Out(), LocationFrom(r0));
557     }
558     RestoreLiveRegisters(codegen, locations);
559     __ B(GetExitLabel());
560   }
561 
GetDescription() const562   const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
563 
564  private:
565   // The class this slow path will load.
566   HLoadClass* const cls_;
567 
568   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
569 };
570 
571 class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
572  public:
LoadStringSlowPathARMVIXL(HLoadString * instruction)573   explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
574       : SlowPathCodeARMVIXL(instruction) {}
575 
EmitNativeCode(CodeGenerator * codegen)576   void EmitNativeCode(CodeGenerator* codegen) override {
577     DCHECK(instruction_->IsLoadString());
578     DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
579     LocationSummary* locations = instruction_->GetLocations();
580     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
581     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
582 
583     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
584     __ Bind(GetEntryLabel());
585     SaveLiveRegisters(codegen, locations);
586 
587     InvokeRuntimeCallingConventionARMVIXL calling_convention;
588     __ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
589     arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
590     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
591 
592     arm_codegen->Move32(locations->Out(), LocationFrom(r0));
593     RestoreLiveRegisters(codegen, locations);
594 
595     __ B(GetExitLabel());
596   }
597 
GetDescription() const598   const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
599 
600  private:
601   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
602 };
603 
604 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
605  public:
TypeCheckSlowPathARMVIXL(HInstruction * instruction,bool is_fatal)606   TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
607       : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
608 
EmitNativeCode(CodeGenerator * codegen)609   void EmitNativeCode(CodeGenerator* codegen) override {
610     LocationSummary* locations = instruction_->GetLocations();
611     DCHECK(instruction_->IsCheckCast()
612            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
613 
614     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
615     __ Bind(GetEntryLabel());
616 
617     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
618       SaveLiveRegisters(codegen, locations);
619     }
620 
621     // We're moving two locations to locations that could overlap, so we need a parallel
622     // move resolver.
623     InvokeRuntimeCallingConventionARMVIXL calling_convention;
624 
625     codegen->EmitParallelMoves(locations->InAt(0),
626                                LocationFrom(calling_convention.GetRegisterAt(0)),
627                                DataType::Type::kReference,
628                                locations->InAt(1),
629                                LocationFrom(calling_convention.GetRegisterAt(1)),
630                                DataType::Type::kReference);
631     if (instruction_->IsInstanceOf()) {
632       arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
633                                  instruction_,
634                                  instruction_->GetDexPc(),
635                                  this);
636       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
637       arm_codegen->Move32(locations->Out(), LocationFrom(r0));
638     } else {
639       DCHECK(instruction_->IsCheckCast());
640       arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
641                                  instruction_,
642                                  instruction_->GetDexPc(),
643                                  this);
644       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
645     }
646 
647     if (!is_fatal_) {
648       RestoreLiveRegisters(codegen, locations);
649       __ B(GetExitLabel());
650     }
651   }
652 
GetDescription() const653   const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
654 
IsFatal() const655   bool IsFatal() const override { return is_fatal_; }
656 
657  private:
658   const bool is_fatal_;
659 
660   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
661 };
662 
663 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
664  public:
DeoptimizationSlowPathARMVIXL(HDeoptimize * instruction)665   explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
666       : SlowPathCodeARMVIXL(instruction) {}
667 
EmitNativeCode(CodeGenerator * codegen)668   void EmitNativeCode(CodeGenerator* codegen) override {
669     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
670     __ Bind(GetEntryLabel());
671         LocationSummary* locations = instruction_->GetLocations();
672     SaveLiveRegisters(codegen, locations);
673     InvokeRuntimeCallingConventionARMVIXL calling_convention;
674     __ Mov(calling_convention.GetRegisterAt(0),
675            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
676 
677     arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
678     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
679   }
680 
GetDescription() const681   const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
682 
683  private:
684   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
685 };
686 
687 class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
688  public:
ArraySetSlowPathARMVIXL(HInstruction * instruction)689   explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
690 
EmitNativeCode(CodeGenerator * codegen)691   void EmitNativeCode(CodeGenerator* codegen) override {
692     LocationSummary* locations = instruction_->GetLocations();
693     __ Bind(GetEntryLabel());
694     SaveLiveRegisters(codegen, locations);
695 
696     InvokeRuntimeCallingConventionARMVIXL calling_convention;
697     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
698     parallel_move.AddMove(
699         locations->InAt(0),
700         LocationFrom(calling_convention.GetRegisterAt(0)),
701         DataType::Type::kReference,
702         nullptr);
703     parallel_move.AddMove(
704         locations->InAt(1),
705         LocationFrom(calling_convention.GetRegisterAt(1)),
706         DataType::Type::kInt32,
707         nullptr);
708     parallel_move.AddMove(
709         locations->InAt(2),
710         LocationFrom(calling_convention.GetRegisterAt(2)),
711         DataType::Type::kReference,
712         nullptr);
713     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
714 
715     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
716     arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
717     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
718     RestoreLiveRegisters(codegen, locations);
719     __ B(GetExitLabel());
720   }
721 
GetDescription() const722   const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
723 
724  private:
725   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
726 };
727 
728 // Slow path generating a read barrier for a heap reference.
729 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
730  public:
ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)731   ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction,
732                                              Location out,
733                                              Location ref,
734                                              Location obj,
735                                              uint32_t offset,
736                                              Location index)
737       : SlowPathCodeARMVIXL(instruction),
738         out_(out),
739         ref_(ref),
740         obj_(obj),
741         offset_(offset),
742         index_(index) {
743     DCHECK(kEmitCompilerReadBarrier);
744     // If `obj` is equal to `out` or `ref`, it means the initial object
745     // has been overwritten by (or after) the heap object reference load
746     // to be instrumented, e.g.:
747     //
748     //   __ LoadFromOffset(kLoadWord, out, out, offset);
749     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
750     //
751     // In that case, we have lost the information about the original
752     // object, and the emitted read barrier cannot work properly.
753     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
754     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
755   }
756 
EmitNativeCode(CodeGenerator * codegen)757   void EmitNativeCode(CodeGenerator* codegen) override {
758     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
759     LocationSummary* locations = instruction_->GetLocations();
760     vixl32::Register reg_out = RegisterFrom(out_);
761     DCHECK(locations->CanCall());
762     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
763     DCHECK(instruction_->IsInstanceFieldGet() ||
764            instruction_->IsPredicatedInstanceFieldGet() ||
765            instruction_->IsStaticFieldGet() ||
766            instruction_->IsArrayGet() ||
767            instruction_->IsInstanceOf() ||
768            instruction_->IsCheckCast() ||
769            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
770         << "Unexpected instruction in read barrier for heap reference slow path: "
771         << instruction_->DebugName();
772     // The read barrier instrumentation of object ArrayGet
773     // instructions does not support the HIntermediateAddress
774     // instruction.
775     DCHECK(!(instruction_->IsArrayGet() &&
776              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
777 
778     __ Bind(GetEntryLabel());
779     SaveLiveRegisters(codegen, locations);
780 
781     // We may have to change the index's value, but as `index_` is a
782     // constant member (like other "inputs" of this slow path),
783     // introduce a copy of it, `index`.
784     Location index = index_;
785     if (index_.IsValid()) {
786       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
787       if (instruction_->IsArrayGet()) {
788         // Compute the actual memory offset and store it in `index`.
789         vixl32::Register index_reg = RegisterFrom(index_);
790         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode()));
791         if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) {
792           // We are about to change the value of `index_reg` (see the
793           // calls to art::arm::ArmVIXLMacroAssembler::Lsl and
794           // art::arm::ArmVIXLMacroAssembler::Add below), but it has
795           // not been saved by the previous call to
796           // art::SlowPathCode::SaveLiveRegisters, as it is a
797           // callee-save register --
798           // art::SlowPathCode::SaveLiveRegisters does not consider
799           // callee-save registers, as it has been designed with the
800           // assumption that callee-save registers are supposed to be
801           // handled by the called function.  So, as a callee-save
802           // register, `index_reg` _would_ eventually be saved onto
803           // the stack, but it would be too late: we would have
804           // changed its value earlier.  Therefore, we manually save
805           // it here into another freely available register,
806           // `free_reg`, chosen of course among the caller-save
807           // registers (as a callee-save `free_reg` register would
808           // exhibit the same problem).
809           //
810           // Note we could have requested a temporary register from
811           // the register allocator instead; but we prefer not to, as
812           // this is a slow path, and we know we can find a
813           // caller-save register that is available.
814           vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen);
815           __ Mov(free_reg, index_reg);
816           index_reg = free_reg;
817           index = LocationFrom(index_reg);
818         } else {
819           // The initial register stored in `index_` has already been
820           // saved in the call to art::SlowPathCode::SaveLiveRegisters
821           // (as it is not a callee-save register), so we can freely
822           // use it.
823         }
824         // Shifting the index value contained in `index_reg` by the scale
825         // factor (2) cannot overflow in practice, as the runtime is
826         // unable to allocate object arrays with a size larger than
827         // 2^26 - 1 (that is, 2^28 - 4 bytes).
828         __ Lsl(index_reg, index_reg, TIMES_4);
829         static_assert(
830             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
831             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
832         __ Add(index_reg, index_reg, offset_);
833       } else {
834         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
835         // intrinsics, `index_` is not shifted by a scale factor of 2
836         // (as in the case of ArrayGet), as it is actually an offset
837         // to an object field within an object.
838         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
839         DCHECK(instruction_->GetLocations()->Intrinsified());
840         Intrinsics intrinsic = instruction_->AsInvoke()->GetIntrinsic();
841         DCHECK(intrinsic == Intrinsics::kUnsafeGetObject ||
842                intrinsic == Intrinsics::kUnsafeGetObjectVolatile ||
843                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
844                    mirror::VarHandle::AccessModeTemplate::kGet ||
845                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
846                    mirror::VarHandle::AccessModeTemplate::kCompareAndSet ||
847                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
848                    mirror::VarHandle::AccessModeTemplate::kCompareAndExchange ||
849                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
850                    mirror::VarHandle::AccessModeTemplate::kGetAndUpdate)
851             << instruction_->AsInvoke()->GetIntrinsic();
852         DCHECK_EQ(offset_, 0U);
853         // Though UnsafeGet's offset location is a register pair, we only pass the low
854         // part (high part is irrelevant for 32-bit addresses) to the slow path.
855         // For VarHandle intrinsics, the index is always just a register.
856         DCHECK(index_.IsRegister());
857         index = index_;
858       }
859     }
860 
861     // We're moving two or three locations to locations that could
862     // overlap, so we need a parallel move resolver.
863     InvokeRuntimeCallingConventionARMVIXL calling_convention;
864     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
865     parallel_move.AddMove(ref_,
866                           LocationFrom(calling_convention.GetRegisterAt(0)),
867                           DataType::Type::kReference,
868                           nullptr);
869     parallel_move.AddMove(obj_,
870                           LocationFrom(calling_convention.GetRegisterAt(1)),
871                           DataType::Type::kReference,
872                           nullptr);
873     if (index.IsValid()) {
874       parallel_move.AddMove(index,
875                             LocationFrom(calling_convention.GetRegisterAt(2)),
876                             DataType::Type::kInt32,
877                             nullptr);
878       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
879     } else {
880       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
881       __ Mov(calling_convention.GetRegisterAt(2), offset_);
882     }
883     arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
884     CheckEntrypointTypes<
885         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
886     arm_codegen->Move32(out_, LocationFrom(r0));
887 
888     RestoreLiveRegisters(codegen, locations);
889     __ B(GetExitLabel());
890   }
891 
GetDescription() const892   const char* GetDescription() const override {
893     return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
894   }
895 
896  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)897   vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
898     uint32_t ref = RegisterFrom(ref_).GetCode();
899     uint32_t obj = RegisterFrom(obj_).GetCode();
900     for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
901       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
902         return vixl32::Register(i);
903       }
904     }
905     // We shall never fail to find a free caller-save register, as
906     // there are more than two core caller-save registers on ARM
907     // (meaning it is possible to find one which is different from
908     // `ref` and `obj`).
909     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
910     LOG(FATAL) << "Could not find a free caller-save register";
911     UNREACHABLE();
912   }
913 
914   const Location out_;
915   const Location ref_;
916   const Location obj_;
917   const uint32_t offset_;
918   // An additional location containing an index to an array.
919   // Only used for HArrayGet and the UnsafeGetObject &
920   // UnsafeGetObjectVolatile intrinsics.
921   const Location index_;
922 
923   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL);
924 };
925 
926 // Slow path generating a read barrier for a GC root.
927 class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
928  public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction * instruction,Location out,Location root)929   ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
930       : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
931     DCHECK(kEmitCompilerReadBarrier);
932   }
933 
EmitNativeCode(CodeGenerator * codegen)934   void EmitNativeCode(CodeGenerator* codegen) override {
935     LocationSummary* locations = instruction_->GetLocations();
936     vixl32::Register reg_out = RegisterFrom(out_);
937     DCHECK(locations->CanCall());
938     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
939     DCHECK(instruction_->IsLoadClass() ||
940            instruction_->IsLoadString() ||
941            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
942         << "Unexpected instruction in read barrier for GC root slow path: "
943         << instruction_->DebugName();
944 
945     __ Bind(GetEntryLabel());
946     SaveLiveRegisters(codegen, locations);
947 
948     InvokeRuntimeCallingConventionARMVIXL calling_convention;
949     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
950     arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_);
951     arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
952                                instruction_,
953                                instruction_->GetDexPc(),
954                                this);
955     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
956     arm_codegen->Move32(out_, LocationFrom(r0));
957 
958     RestoreLiveRegisters(codegen, locations);
959     __ B(GetExitLabel());
960   }
961 
GetDescription() const962   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
963 
964  private:
965   const Location out_;
966   const Location root_;
967 
968   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
969 };
970 
ARMCondition(IfCondition cond)971 inline vixl32::Condition ARMCondition(IfCondition cond) {
972   switch (cond) {
973     case kCondEQ: return eq;
974     case kCondNE: return ne;
975     case kCondLT: return lt;
976     case kCondLE: return le;
977     case kCondGT: return gt;
978     case kCondGE: return ge;
979     case kCondB:  return lo;
980     case kCondBE: return ls;
981     case kCondA:  return hi;
982     case kCondAE: return hs;
983   }
984   LOG(FATAL) << "Unreachable";
985   UNREACHABLE();
986 }
987 
988 // Maps signed condition to unsigned condition.
ARMUnsignedCondition(IfCondition cond)989 inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
990   switch (cond) {
991     case kCondEQ: return eq;
992     case kCondNE: return ne;
993     // Signed to unsigned.
994     case kCondLT: return lo;
995     case kCondLE: return ls;
996     case kCondGT: return hi;
997     case kCondGE: return hs;
998     // Unsigned remain unchanged.
999     case kCondB:  return lo;
1000     case kCondBE: return ls;
1001     case kCondA:  return hi;
1002     case kCondAE: return hs;
1003   }
1004   LOG(FATAL) << "Unreachable";
1005   UNREACHABLE();
1006 }
1007 
ARMFPCondition(IfCondition cond,bool gt_bias)1008 inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
1009   // The ARM condition codes can express all the necessary branches, see the
1010   // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
1011   // There is no dex instruction or HIR that would need the missing conditions
1012   // "equal or unordered" or "not equal".
1013   switch (cond) {
1014     case kCondEQ: return eq;
1015     case kCondNE: return ne /* unordered */;
1016     case kCondLT: return gt_bias ? cc : lt /* unordered */;
1017     case kCondLE: return gt_bias ? ls : le /* unordered */;
1018     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
1019     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
1020     default:
1021       LOG(FATAL) << "UNREACHABLE";
1022       UNREACHABLE();
1023   }
1024 }
1025 
ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind)1026 inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
1027   switch (op_kind) {
1028     case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
1029     case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
1030     case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
1031     default:
1032       LOG(FATAL) << "Unexpected op kind " << op_kind;
1033       UNREACHABLE();
1034   }
1035 }
1036 
DumpCoreRegister(std::ostream & stream,int reg) const1037 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
1038   stream << vixl32::Register(reg);
1039 }
1040 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1041 void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1042   stream << vixl32::SRegister(reg);
1043 }
1044 
GetInstructionSetFeatures() const1045 const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
1046   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
1047 }
1048 
ComputeSRegisterListMask(const SRegisterList & regs)1049 static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
1050   uint32_t mask = 0;
1051   for (uint32_t i = regs.GetFirstSRegister().GetCode();
1052        i <= regs.GetLastSRegister().GetCode();
1053        ++i) {
1054     mask |= (1 << i);
1055   }
1056   return mask;
1057 }
1058 
1059 // Saves the register in the stack. Returns the size taken on stack.
SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1060 size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
1061                                               uint32_t reg_id ATTRIBUTE_UNUSED) {
1062   TODO_VIXL32(FATAL);
1063   UNREACHABLE();
1064 }
1065 
1066 // Restores the register from the stack. Returns the size taken on stack.
RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1067 size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
1068                                                  uint32_t reg_id ATTRIBUTE_UNUSED) {
1069   TODO_VIXL32(FATAL);
1070   UNREACHABLE();
1071 }
1072 
SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1073 size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1074                                                        uint32_t reg_id ATTRIBUTE_UNUSED) {
1075   TODO_VIXL32(FATAL);
1076   UNREACHABLE();
1077 }
1078 
RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1079 size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1080                                                           uint32_t reg_id ATTRIBUTE_UNUSED) {
1081   TODO_VIXL32(FATAL);
1082   UNREACHABLE();
1083 }
1084 
GenerateDataProcInstruction(HInstruction::InstructionKind kind,vixl32::Register out,vixl32::Register first,const Operand & second,CodeGeneratorARMVIXL * codegen)1085 static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
1086                                         vixl32::Register out,
1087                                         vixl32::Register first,
1088                                         const Operand& second,
1089                                         CodeGeneratorARMVIXL* codegen) {
1090   if (second.IsImmediate() && second.GetImmediate() == 0) {
1091     const Operand in = kind == HInstruction::kAnd
1092         ? Operand(0)
1093         : Operand(first);
1094 
1095     __ Mov(out, in);
1096   } else {
1097     switch (kind) {
1098       case HInstruction::kAdd:
1099         __ Add(out, first, second);
1100         break;
1101       case HInstruction::kAnd:
1102         __ And(out, first, second);
1103         break;
1104       case HInstruction::kOr:
1105         __ Orr(out, first, second);
1106         break;
1107       case HInstruction::kSub:
1108         __ Sub(out, first, second);
1109         break;
1110       case HInstruction::kXor:
1111         __ Eor(out, first, second);
1112         break;
1113       default:
1114         LOG(FATAL) << "Unexpected instruction kind: " << kind;
1115         UNREACHABLE();
1116     }
1117   }
1118 }
1119 
GenerateDataProc(HInstruction::InstructionKind kind,const Location & out,const Location & first,const Operand & second_lo,const Operand & second_hi,CodeGeneratorARMVIXL * codegen)1120 static void GenerateDataProc(HInstruction::InstructionKind kind,
1121                              const Location& out,
1122                              const Location& first,
1123                              const Operand& second_lo,
1124                              const Operand& second_hi,
1125                              CodeGeneratorARMVIXL* codegen) {
1126   const vixl32::Register first_hi = HighRegisterFrom(first);
1127   const vixl32::Register first_lo = LowRegisterFrom(first);
1128   const vixl32::Register out_hi = HighRegisterFrom(out);
1129   const vixl32::Register out_lo = LowRegisterFrom(out);
1130 
1131   if (kind == HInstruction::kAdd) {
1132     __ Adds(out_lo, first_lo, second_lo);
1133     __ Adc(out_hi, first_hi, second_hi);
1134   } else if (kind == HInstruction::kSub) {
1135     __ Subs(out_lo, first_lo, second_lo);
1136     __ Sbc(out_hi, first_hi, second_hi);
1137   } else {
1138     GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
1139     GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
1140   }
1141 }
1142 
GetShifterOperand(vixl32::Register rm,ShiftType shift,uint32_t shift_imm)1143 static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
1144   return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
1145 }
1146 
GenerateLongDataProc(HDataProcWithShifterOp * instruction,CodeGeneratorARMVIXL * codegen)1147 static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
1148                                  CodeGeneratorARMVIXL* codegen) {
1149   DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
1150   DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
1151 
1152   const LocationSummary* const locations = instruction->GetLocations();
1153   const uint32_t shift_value = instruction->GetShiftAmount();
1154   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
1155   const Location first = locations->InAt(0);
1156   const Location second = locations->InAt(1);
1157   const Location out = locations->Out();
1158   const vixl32::Register first_hi = HighRegisterFrom(first);
1159   const vixl32::Register first_lo = LowRegisterFrom(first);
1160   const vixl32::Register out_hi = HighRegisterFrom(out);
1161   const vixl32::Register out_lo = LowRegisterFrom(out);
1162   const vixl32::Register second_hi = HighRegisterFrom(second);
1163   const vixl32::Register second_lo = LowRegisterFrom(second);
1164   const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
1165 
1166   if (shift_value >= 32) {
1167     if (shift == ShiftType::LSL) {
1168       GenerateDataProcInstruction(kind,
1169                                   out_hi,
1170                                   first_hi,
1171                                   Operand(second_lo, ShiftType::LSL, shift_value - 32),
1172                                   codegen);
1173       GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
1174     } else if (shift == ShiftType::ASR) {
1175       GenerateDataProc(kind,
1176                        out,
1177                        first,
1178                        GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
1179                        Operand(second_hi, ShiftType::ASR, 31),
1180                        codegen);
1181     } else {
1182       DCHECK_EQ(shift, ShiftType::LSR);
1183       GenerateDataProc(kind,
1184                        out,
1185                        first,
1186                        GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
1187                        0,
1188                        codegen);
1189     }
1190   } else {
1191     DCHECK_GT(shift_value, 1U);
1192     DCHECK_LT(shift_value, 32U);
1193 
1194     UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1195 
1196     if (shift == ShiftType::LSL) {
1197       // We are not doing this for HInstruction::kAdd because the output will require
1198       // Location::kOutputOverlap; not applicable to other cases.
1199       if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1200         GenerateDataProcInstruction(kind,
1201                                     out_hi,
1202                                     first_hi,
1203                                     Operand(second_hi, ShiftType::LSL, shift_value),
1204                                     codegen);
1205         GenerateDataProcInstruction(kind,
1206                                     out_hi,
1207                                     out_hi,
1208                                     Operand(second_lo, ShiftType::LSR, 32 - shift_value),
1209                                     codegen);
1210         GenerateDataProcInstruction(kind,
1211                                     out_lo,
1212                                     first_lo,
1213                                     Operand(second_lo, ShiftType::LSL, shift_value),
1214                                     codegen);
1215       } else {
1216         const vixl32::Register temp = temps.Acquire();
1217 
1218         __ Lsl(temp, second_hi, shift_value);
1219         __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
1220         GenerateDataProc(kind,
1221                          out,
1222                          first,
1223                          Operand(second_lo, ShiftType::LSL, shift_value),
1224                          temp,
1225                          codegen);
1226       }
1227     } else {
1228       DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
1229 
1230       // We are not doing this for HInstruction::kAdd because the output will require
1231       // Location::kOutputOverlap; not applicable to other cases.
1232       if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1233         GenerateDataProcInstruction(kind,
1234                                     out_lo,
1235                                     first_lo,
1236                                     Operand(second_lo, ShiftType::LSR, shift_value),
1237                                     codegen);
1238         GenerateDataProcInstruction(kind,
1239                                     out_lo,
1240                                     out_lo,
1241                                     Operand(second_hi, ShiftType::LSL, 32 - shift_value),
1242                                     codegen);
1243         GenerateDataProcInstruction(kind,
1244                                     out_hi,
1245                                     first_hi,
1246                                     Operand(second_hi, shift, shift_value),
1247                                     codegen);
1248       } else {
1249         const vixl32::Register temp = temps.Acquire();
1250 
1251         __ Lsr(temp, second_lo, shift_value);
1252         __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
1253         GenerateDataProc(kind,
1254                          out,
1255                          first,
1256                          temp,
1257                          Operand(second_hi, shift, shift_value),
1258                          codegen);
1259       }
1260     }
1261   }
1262 }
1263 
GenerateVcmp(HInstruction * instruction,CodeGeneratorARMVIXL * codegen)1264 static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
1265   const Location rhs_loc = instruction->GetLocations()->InAt(1);
1266   if (rhs_loc.IsConstant()) {
1267     // 0.0 is the only immediate that can be encoded directly in
1268     // a VCMP instruction.
1269     //
1270     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
1271     // specify that in a floating-point comparison, positive zero
1272     // and negative zero are considered equal, so we can use the
1273     // literal 0.0 for both cases here.
1274     //
1275     // Note however that some methods (Float.equal, Float.compare,
1276     // Float.compareTo, Double.equal, Double.compare,
1277     // Double.compareTo, Math.max, Math.min, StrictMath.max,
1278     // StrictMath.min) consider 0.0 to be (strictly) greater than
1279     // -0.0. So if we ever translate calls to these methods into a
1280     // HCompare instruction, we must handle the -0.0 case with
1281     // care here.
1282     DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
1283 
1284     const DataType::Type type = instruction->InputAt(0)->GetType();
1285 
1286     if (type == DataType::Type::kFloat32) {
1287       __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
1288     } else {
1289       DCHECK_EQ(type, DataType::Type::kFloat64);
1290       __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
1291     }
1292   } else {
1293     __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
1294   }
1295 }
1296 
AdjustConstantForCondition(int64_t value,IfCondition * condition,IfCondition * opposite)1297 static int64_t AdjustConstantForCondition(int64_t value,
1298                                           IfCondition* condition,
1299                                           IfCondition* opposite) {
1300   if (value == 1) {
1301     if (*condition == kCondB) {
1302       value = 0;
1303       *condition = kCondEQ;
1304       *opposite = kCondNE;
1305     } else if (*condition == kCondAE) {
1306       value = 0;
1307       *condition = kCondNE;
1308       *opposite = kCondEQ;
1309     }
1310   } else if (value == -1) {
1311     if (*condition == kCondGT) {
1312       value = 0;
1313       *condition = kCondGE;
1314       *opposite = kCondLT;
1315     } else if (*condition == kCondLE) {
1316       value = 0;
1317       *condition = kCondLT;
1318       *opposite = kCondGE;
1319     }
1320   }
1321 
1322   return value;
1323 }
1324 
GenerateLongTestConstant(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1325 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
1326     HCondition* condition,
1327     bool invert,
1328     CodeGeneratorARMVIXL* codegen) {
1329   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1330 
1331   const LocationSummary* const locations = condition->GetLocations();
1332   IfCondition cond = condition->GetCondition();
1333   IfCondition opposite = condition->GetOppositeCondition();
1334 
1335   if (invert) {
1336     std::swap(cond, opposite);
1337   }
1338 
1339   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1340   const Location left = locations->InAt(0);
1341   const Location right = locations->InAt(1);
1342 
1343   DCHECK(right.IsConstant());
1344 
1345   const vixl32::Register left_high = HighRegisterFrom(left);
1346   const vixl32::Register left_low = LowRegisterFrom(left);
1347   int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
1348   UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1349 
1350   // Comparisons against 0 are common enough to deserve special attention.
1351   if (value == 0) {
1352     switch (cond) {
1353       case kCondNE:
1354       // x > 0 iff x != 0 when the comparison is unsigned.
1355       case kCondA:
1356         ret = std::make_pair(ne, eq);
1357         FALLTHROUGH_INTENDED;
1358       case kCondEQ:
1359       // x <= 0 iff x == 0 when the comparison is unsigned.
1360       case kCondBE:
1361         __ Orrs(temps.Acquire(), left_low, left_high);
1362         return ret;
1363       case kCondLT:
1364       case kCondGE:
1365         __ Cmp(left_high, 0);
1366         return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1367       // Trivially true or false.
1368       case kCondB:
1369         ret = std::make_pair(ne, eq);
1370         FALLTHROUGH_INTENDED;
1371       case kCondAE:
1372         __ Cmp(left_low, left_low);
1373         return ret;
1374       default:
1375         break;
1376     }
1377   }
1378 
1379   switch (cond) {
1380     case kCondEQ:
1381     case kCondNE:
1382     case kCondB:
1383     case kCondBE:
1384     case kCondA:
1385     case kCondAE: {
1386       const uint32_t value_low = Low32Bits(value);
1387       Operand operand_low(value_low);
1388 
1389       __ Cmp(left_high, High32Bits(value));
1390 
1391       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1392       // we must ensure that the operands corresponding to the least significant
1393       // halves of the inputs fit into a 16-bit CMP encoding.
1394       if (!left_low.IsLow() || !IsUint<8>(value_low)) {
1395         operand_low = Operand(temps.Acquire());
1396         __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
1397       }
1398 
1399       // We use the scope because of the IT block that follows.
1400       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1401                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1402                                CodeBufferCheckScope::kExactSize);
1403 
1404       __ it(eq);
1405       __ cmp(eq, left_low, operand_low);
1406       ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1407       break;
1408     }
1409     case kCondLE:
1410     case kCondGT:
1411       // Trivially true or false.
1412       if (value == std::numeric_limits<int64_t>::max()) {
1413         __ Cmp(left_low, left_low);
1414         ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
1415         break;
1416       }
1417 
1418       if (cond == kCondLE) {
1419         DCHECK_EQ(opposite, kCondGT);
1420         cond = kCondLT;
1421         opposite = kCondGE;
1422       } else {
1423         DCHECK_EQ(cond, kCondGT);
1424         DCHECK_EQ(opposite, kCondLE);
1425         cond = kCondGE;
1426         opposite = kCondLT;
1427       }
1428 
1429       value++;
1430       FALLTHROUGH_INTENDED;
1431     case kCondGE:
1432     case kCondLT: {
1433       __ Cmp(left_low, Low32Bits(value));
1434       __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
1435       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1436       break;
1437     }
1438     default:
1439       LOG(FATAL) << "Unreachable";
1440       UNREACHABLE();
1441   }
1442 
1443   return ret;
1444 }
1445 
GenerateLongTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1446 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
1447     HCondition* condition,
1448     bool invert,
1449     CodeGeneratorARMVIXL* codegen) {
1450   DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1451 
1452   const LocationSummary* const locations = condition->GetLocations();
1453   IfCondition cond = condition->GetCondition();
1454   IfCondition opposite = condition->GetOppositeCondition();
1455 
1456   if (invert) {
1457     std::swap(cond, opposite);
1458   }
1459 
1460   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1461   Location left = locations->InAt(0);
1462   Location right = locations->InAt(1);
1463 
1464   DCHECK(right.IsRegisterPair());
1465 
1466   switch (cond) {
1467     case kCondEQ:
1468     case kCondNE:
1469     case kCondB:
1470     case kCondBE:
1471     case kCondA:
1472     case kCondAE: {
1473       __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
1474 
1475       // We use the scope because of the IT block that follows.
1476       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1477                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1478                                CodeBufferCheckScope::kExactSize);
1479 
1480       __ it(eq);
1481       __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
1482       ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1483       break;
1484     }
1485     case kCondLE:
1486     case kCondGT:
1487       if (cond == kCondLE) {
1488         DCHECK_EQ(opposite, kCondGT);
1489         cond = kCondGE;
1490         opposite = kCondLT;
1491       } else {
1492         DCHECK_EQ(cond, kCondGT);
1493         DCHECK_EQ(opposite, kCondLE);
1494         cond = kCondLT;
1495         opposite = kCondGE;
1496       }
1497 
1498       std::swap(left, right);
1499       FALLTHROUGH_INTENDED;
1500     case kCondGE:
1501     case kCondLT: {
1502       UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1503 
1504       __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
1505       __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
1506       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1507       break;
1508     }
1509     default:
1510       LOG(FATAL) << "Unreachable";
1511       UNREACHABLE();
1512   }
1513 
1514   return ret;
1515 }
1516 
GenerateTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1517 static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
1518                                                                     bool invert,
1519                                                                     CodeGeneratorARMVIXL* codegen) {
1520   const DataType::Type type = condition->GetLeft()->GetType();
1521   IfCondition cond = condition->GetCondition();
1522   IfCondition opposite = condition->GetOppositeCondition();
1523   std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1524 
1525   if (invert) {
1526     std::swap(cond, opposite);
1527   }
1528 
1529   if (type == DataType::Type::kInt64) {
1530     ret = condition->GetLocations()->InAt(1).IsConstant()
1531         ? GenerateLongTestConstant(condition, invert, codegen)
1532         : GenerateLongTest(condition, invert, codegen);
1533   } else if (DataType::IsFloatingPointType(type)) {
1534     GenerateVcmp(condition, codegen);
1535     __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
1536     ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
1537                          ARMFPCondition(opposite, condition->IsGtBias()));
1538   } else {
1539     DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1540     __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
1541     ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1542   }
1543 
1544   return ret;
1545 }
1546 
GenerateConditionGeneric(HCondition * cond,CodeGeneratorARMVIXL * codegen)1547 static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1548   const vixl32::Register out = OutputRegister(cond);
1549   const auto condition = GenerateTest(cond, false, codegen);
1550 
1551   __ Mov(LeaveFlags, out, 0);
1552 
1553   if (out.IsLow()) {
1554     // We use the scope because of the IT block that follows.
1555     ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1556                              2 * vixl32::k16BitT32InstructionSizeInBytes,
1557                              CodeBufferCheckScope::kExactSize);
1558 
1559     __ it(condition.first);
1560     __ mov(condition.first, out, 1);
1561   } else {
1562     vixl32::Label done_label;
1563     vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
1564 
1565     __ B(condition.second, final_label, /* is_far_target= */ false);
1566     __ Mov(out, 1);
1567 
1568     if (done_label.IsReferenced()) {
1569       __ Bind(&done_label);
1570     }
1571   }
1572 }
1573 
GenerateEqualLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1574 static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1575   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1576 
1577   const LocationSummary* const locations = cond->GetLocations();
1578   IfCondition condition = cond->GetCondition();
1579   const vixl32::Register out = OutputRegister(cond);
1580   const Location left = locations->InAt(0);
1581   const Location right = locations->InAt(1);
1582   vixl32::Register left_high = HighRegisterFrom(left);
1583   vixl32::Register left_low = LowRegisterFrom(left);
1584   vixl32::Register temp;
1585   UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1586 
1587   if (right.IsConstant()) {
1588     IfCondition opposite = cond->GetOppositeCondition();
1589     const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
1590                                                      &condition,
1591                                                      &opposite);
1592     Operand right_high = High32Bits(value);
1593     Operand right_low = Low32Bits(value);
1594 
1595     // The output uses Location::kNoOutputOverlap.
1596     if (out.Is(left_high)) {
1597       std::swap(left_low, left_high);
1598       std::swap(right_low, right_high);
1599     }
1600 
1601     __ Sub(out, left_low, right_low);
1602     temp = temps.Acquire();
1603     __ Sub(temp, left_high, right_high);
1604   } else {
1605     DCHECK(right.IsRegisterPair());
1606     temp = temps.Acquire();
1607     __ Sub(temp, left_high, HighRegisterFrom(right));
1608     __ Sub(out, left_low, LowRegisterFrom(right));
1609   }
1610 
1611   // Need to check after calling AdjustConstantForCondition().
1612   DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
1613 
1614   if (condition == kCondNE && out.IsLow()) {
1615     __ Orrs(out, out, temp);
1616 
1617     // We use the scope because of the IT block that follows.
1618     ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1619                              2 * vixl32::k16BitT32InstructionSizeInBytes,
1620                              CodeBufferCheckScope::kExactSize);
1621 
1622     __ it(ne);
1623     __ mov(ne, out, 1);
1624   } else {
1625     __ Orr(out, out, temp);
1626     codegen->GenerateConditionWithZero(condition, out, out, temp);
1627   }
1628 }
1629 
GenerateConditionLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1630 static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1631   DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1632 
1633   const LocationSummary* const locations = cond->GetLocations();
1634   IfCondition condition = cond->GetCondition();
1635   const vixl32::Register out = OutputRegister(cond);
1636   const Location left = locations->InAt(0);
1637   const Location right = locations->InAt(1);
1638 
1639   if (right.IsConstant()) {
1640     IfCondition opposite = cond->GetOppositeCondition();
1641 
1642     // Comparisons against 0 are common enough to deserve special attention.
1643     if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
1644       switch (condition) {
1645         case kCondNE:
1646         case kCondA:
1647           if (out.IsLow()) {
1648             // We only care if both input registers are 0 or not.
1649             __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
1650 
1651             // We use the scope because of the IT block that follows.
1652             ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1653                                      2 * vixl32::k16BitT32InstructionSizeInBytes,
1654                                      CodeBufferCheckScope::kExactSize);
1655 
1656             __ it(ne);
1657             __ mov(ne, out, 1);
1658             return;
1659           }
1660 
1661           FALLTHROUGH_INTENDED;
1662         case kCondEQ:
1663         case kCondBE:
1664           // We only care if both input registers are 0 or not.
1665           __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
1666           codegen->GenerateConditionWithZero(condition, out, out);
1667           return;
1668         case kCondLT:
1669         case kCondGE:
1670           // We only care about the sign bit.
1671           FALLTHROUGH_INTENDED;
1672         case kCondAE:
1673         case kCondB:
1674           codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
1675           return;
1676         case kCondLE:
1677         case kCondGT:
1678         default:
1679           break;
1680       }
1681     }
1682   }
1683 
1684   // If `out` is a low register, then the GenerateConditionGeneric()
1685   // function generates a shorter code sequence that is still branchless.
1686   if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
1687     GenerateEqualLong(cond, codegen);
1688     return;
1689   }
1690 
1691   GenerateConditionGeneric(cond, codegen);
1692 }
1693 
GenerateConditionIntegralOrNonPrimitive(HCondition * cond,CodeGeneratorARMVIXL * codegen)1694 static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
1695                                                     CodeGeneratorARMVIXL* codegen) {
1696   const DataType::Type type = cond->GetLeft()->GetType();
1697 
1698   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1699 
1700   if (type == DataType::Type::kInt64) {
1701     GenerateConditionLong(cond, codegen);
1702     return;
1703   }
1704 
1705   IfCondition condition = cond->GetCondition();
1706   vixl32::Register in = InputRegisterAt(cond, 0);
1707   const vixl32::Register out = OutputRegister(cond);
1708   const Location right = cond->GetLocations()->InAt(1);
1709   int64_t value;
1710 
1711   if (right.IsConstant()) {
1712     IfCondition opposite = cond->GetOppositeCondition();
1713 
1714     value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
1715 
1716     // Comparisons against 0 are common enough to deserve special attention.
1717     if (value == 0) {
1718       switch (condition) {
1719         case kCondNE:
1720         case kCondA:
1721           if (out.IsLow() && out.Is(in)) {
1722             __ Cmp(out, 0);
1723 
1724             // We use the scope because of the IT block that follows.
1725             ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1726                                      2 * vixl32::k16BitT32InstructionSizeInBytes,
1727                                      CodeBufferCheckScope::kExactSize);
1728 
1729             __ it(ne);
1730             __ mov(ne, out, 1);
1731             return;
1732           }
1733 
1734           FALLTHROUGH_INTENDED;
1735         case kCondEQ:
1736         case kCondBE:
1737         case kCondLT:
1738         case kCondGE:
1739         case kCondAE:
1740         case kCondB:
1741           codegen->GenerateConditionWithZero(condition, out, in);
1742           return;
1743         case kCondLE:
1744         case kCondGT:
1745         default:
1746           break;
1747       }
1748     }
1749   }
1750 
1751   if (condition == kCondEQ || condition == kCondNE) {
1752     Operand operand(0);
1753 
1754     if (right.IsConstant()) {
1755       operand = Operand::From(value);
1756     } else if (out.Is(RegisterFrom(right))) {
1757       // Avoid 32-bit instructions if possible.
1758       operand = InputOperandAt(cond, 0);
1759       in = RegisterFrom(right);
1760     } else {
1761       operand = InputOperandAt(cond, 1);
1762     }
1763 
1764     if (condition == kCondNE && out.IsLow()) {
1765       __ Subs(out, in, operand);
1766 
1767       // We use the scope because of the IT block that follows.
1768       ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1769                                2 * vixl32::k16BitT32InstructionSizeInBytes,
1770                                CodeBufferCheckScope::kExactSize);
1771 
1772       __ it(ne);
1773       __ mov(ne, out, 1);
1774     } else {
1775       __ Sub(out, in, operand);
1776       codegen->GenerateConditionWithZero(condition, out, out);
1777     }
1778 
1779     return;
1780   }
1781 
1782   GenerateConditionGeneric(cond, codegen);
1783 }
1784 
CanEncodeConstantAs8BitImmediate(HConstant * constant)1785 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
1786   const DataType::Type type = constant->GetType();
1787   bool ret = false;
1788 
1789   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1790 
1791   if (type == DataType::Type::kInt64) {
1792     const uint64_t value = Uint64ConstantFrom(constant);
1793 
1794     ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
1795   } else {
1796     ret = IsUint<8>(Int32ConstantFrom(constant));
1797   }
1798 
1799   return ret;
1800 }
1801 
Arm8BitEncodableConstantOrRegister(HInstruction * constant)1802 static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
1803   DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
1804 
1805   if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
1806     return Location::ConstantLocation(constant->AsConstant());
1807   }
1808 
1809   return Location::RequiresRegister();
1810 }
1811 
CanGenerateConditionalMove(const Location & out,const Location & src)1812 static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
1813   // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1814   // we check that we are not dealing with floating-point output (there is no
1815   // 16-bit VMOV encoding).
1816   if (!out.IsRegister() && !out.IsRegisterPair()) {
1817     return false;
1818   }
1819 
1820   // For constants, we also check that the output is in one or two low registers,
1821   // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
1822   // MOV encoding can be used.
1823   if (src.IsConstant()) {
1824     if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
1825       return false;
1826     }
1827 
1828     if (out.IsRegister()) {
1829       if (!RegisterFrom(out).IsLow()) {
1830         return false;
1831       }
1832     } else {
1833       DCHECK(out.IsRegisterPair());
1834 
1835       if (!HighRegisterFrom(out).IsLow()) {
1836         return false;
1837       }
1838     }
1839   }
1840 
1841   return true;
1842 }
1843 
1844 #undef __
1845 
GetFinalLabel(HInstruction * instruction,vixl32::Label * final_label)1846 vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
1847                                                    vixl32::Label* final_label) {
1848   DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
1849   DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
1850 
1851   const HBasicBlock* const block = instruction->GetBlock();
1852   const HLoopInformation* const info = block->GetLoopInformation();
1853   HInstruction* const next = instruction->GetNext();
1854 
1855   // Avoid a branch to a branch.
1856   if (next->IsGoto() && (info == nullptr ||
1857                          !info->IsBackEdge(*block) ||
1858                          !info->HasSuspendCheck())) {
1859     final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
1860   }
1861 
1862   return final_label;
1863 }
1864 
CodeGeneratorARMVIXL(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1865 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
1866                                            const CompilerOptions& compiler_options,
1867                                            OptimizingCompilerStats* stats)
1868     : CodeGenerator(graph,
1869                     kNumberOfCoreRegisters,
1870                     kNumberOfSRegisters,
1871                     kNumberOfRegisterPairs,
1872                     kCoreCalleeSaves.GetList(),
1873                     ComputeSRegisterListMask(kFpuCalleeSaves),
1874                     compiler_options,
1875                     stats),
1876       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1877       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1878       location_builder_(graph, this),
1879       instruction_visitor_(graph, this),
1880       move_resolver_(graph->GetAllocator(), this),
1881       assembler_(graph->GetAllocator()),
1882       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1883       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1884       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1885       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1886       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1887       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1888       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1889       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1890       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1891       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1892       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1893       uint32_literals_(std::less<uint32_t>(),
1894                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1895       jit_string_patches_(StringReferenceValueComparator(),
1896                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1897       jit_class_patches_(TypeReferenceValueComparator(),
1898                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1899       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1900                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1901   // Always save the LR register to mimic Quick.
1902   AddAllocatedRegister(Location::RegisterLocation(LR));
1903   // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
1904   // S0-S31, which alias to D0-D15.
1905   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
1906   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
1907 }
1908 
EmitTable(CodeGeneratorARMVIXL * codegen)1909 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
1910   uint32_t num_entries = switch_instr_->GetNumEntries();
1911   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1912 
1913   // We are about to use the assembler to place literals directly. Make sure we have enough
1914   // underlying code buffer and we have generated a jump table of the right size, using
1915   // codegen->GetVIXLAssembler()->GetBuffer().Align();
1916   ExactAssemblyScope aas(codegen->GetVIXLAssembler(),
1917                          num_entries * sizeof(int32_t),
1918                          CodeBufferCheckScope::kMaximumSize);
1919   // TODO(VIXL): Check that using lower case bind is fine here.
1920   codegen->GetVIXLAssembler()->bind(&table_start_);
1921   for (uint32_t i = 0; i < num_entries; i++) {
1922     codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
1923   }
1924 }
1925 
FixTable(CodeGeneratorARMVIXL * codegen)1926 void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
1927   uint32_t num_entries = switch_instr_->GetNumEntries();
1928   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1929 
1930   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
1931   for (uint32_t i = 0; i < num_entries; i++) {
1932     vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
1933     DCHECK(target_label->IsBound());
1934     int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
1935     // When doing BX to address we need to have lower bit set to 1 in T32.
1936     if (codegen->GetVIXLAssembler()->IsUsingT32()) {
1937       jump_offset++;
1938     }
1939     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
1940     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
1941 
1942     bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
1943   }
1944 }
1945 
FixJumpTables()1946 void CodeGeneratorARMVIXL::FixJumpTables() {
1947   for (auto&& jump_table : jump_tables_) {
1948     jump_table->FixTable(this);
1949   }
1950 }
1951 
1952 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
1953 
Finalize(CodeAllocator * allocator)1954 void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
1955   FixJumpTables();
1956 
1957   // Emit JIT baker read barrier slow paths.
1958   DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
1959   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
1960     uint32_t encoded_data = entry.first;
1961     vixl::aarch32::Label* slow_path_entry = &entry.second.label;
1962     __ Bind(slow_path_entry);
1963     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
1964   }
1965 
1966   GetAssembler()->FinalizeCode();
1967   CodeGenerator::Finalize(allocator);
1968 
1969   // Verify Baker read barrier linker patches.
1970   if (kIsDebugBuild) {
1971     ArrayRef<const uint8_t> code = allocator->GetMemory();
1972     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
1973       DCHECK(info.label.IsBound());
1974       uint32_t literal_offset = info.label.GetLocation();
1975       DCHECK_ALIGNED(literal_offset, 2u);
1976 
1977       auto GetInsn16 = [&code](uint32_t offset) {
1978         DCHECK_ALIGNED(offset, 2u);
1979         return (static_cast<uint32_t>(code[offset + 0]) << 0) +
1980                (static_cast<uint32_t>(code[offset + 1]) << 8);
1981       };
1982       auto GetInsn32 = [=](uint32_t offset) {
1983         return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
1984       };
1985 
1986       uint32_t encoded_data = info.custom_data;
1987       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
1988       // Check that the next instruction matches the expected LDR.
1989       switch (kind) {
1990         case BakerReadBarrierKind::kField: {
1991           BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
1992           if (width == BakerReadBarrierWidth::kWide) {
1993             DCHECK_GE(code.size() - literal_offset, 8u);
1994             uint32_t next_insn = GetInsn32(literal_offset + 4u);
1995             // LDR (immediate), encoding T3, with correct base_reg.
1996             CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
1997             const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1998             CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
1999           } else {
2000             DCHECK_GE(code.size() - literal_offset, 6u);
2001             uint32_t next_insn = GetInsn16(literal_offset + 4u);
2002             // LDR (immediate), encoding T1, with correct base_reg.
2003             CheckValidReg(next_insn & 0x7u);  // Check destination register.
2004             const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2005             CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
2006           }
2007           break;
2008         }
2009         case BakerReadBarrierKind::kArray: {
2010           DCHECK_GE(code.size() - literal_offset, 8u);
2011           uint32_t next_insn = GetInsn32(literal_offset + 4u);
2012           // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
2013           CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
2014           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2015           CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
2016           CheckValidReg(next_insn & 0xf);  // Check index register
2017           break;
2018         }
2019         case BakerReadBarrierKind::kGcRoot: {
2020           BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
2021           if (width == BakerReadBarrierWidth::kWide) {
2022             DCHECK_GE(literal_offset, 4u);
2023             uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2024             // LDR (immediate), encoding T3, with correct root_reg.
2025             const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2026             CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
2027           } else {
2028             DCHECK_GE(literal_offset, 2u);
2029             uint32_t prev_insn = GetInsn16(literal_offset - 2u);
2030             const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2031             // Usually LDR (immediate), encoding T1, with correct root_reg but we may have
2032             // a `MOV marked, old_value` for intrinsic CAS where `marked` is a low register.
2033             if ((prev_insn & 0xff87u) != (0x4600 | root_reg)) {
2034               CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
2035             }
2036           }
2037           break;
2038         }
2039         case BakerReadBarrierKind::kIntrinsicCas: {
2040           DCHECK_GE(literal_offset, 4u);
2041           uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2042           // MOV (register), encoding T3, with correct root_reg.
2043           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2044           DCHECK_GE(root_reg, 8u);  // Used only for high registers.
2045           CHECK_EQ(prev_insn & 0xfffffff0u, 0xea4f0000u | (root_reg << 8));
2046           break;
2047         }
2048         default:
2049           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
2050           UNREACHABLE();
2051       }
2052     }
2053   }
2054 }
2055 
SetupBlockedRegisters() const2056 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
2057   // Stack register, LR and PC are always reserved.
2058   blocked_core_registers_[SP] = true;
2059   blocked_core_registers_[LR] = true;
2060   blocked_core_registers_[PC] = true;
2061 
2062   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2063     // Reserve marking register.
2064     blocked_core_registers_[MR] = true;
2065   }
2066 
2067   // Reserve thread register.
2068   blocked_core_registers_[TR] = true;
2069 
2070   // Reserve temp register.
2071   blocked_core_registers_[IP] = true;
2072 
2073   if (GetGraph()->IsDebuggable()) {
2074     // Stubs do not save callee-save floating point registers. If the graph
2075     // is debuggable, we need to deal with these registers differently. For
2076     // now, just block them.
2077     for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
2078          i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
2079          ++i) {
2080       blocked_fpu_registers_[i] = true;
2081     }
2082   }
2083 }
2084 
InstructionCodeGeneratorARMVIXL(HGraph * graph,CodeGeneratorARMVIXL * codegen)2085 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
2086                                                                  CodeGeneratorARMVIXL* codegen)
2087       : InstructionCodeGenerator(graph, codegen),
2088         assembler_(codegen->GetAssembler()),
2089         codegen_(codegen) {}
2090 
ComputeSpillMask()2091 void CodeGeneratorARMVIXL::ComputeSpillMask() {
2092   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
2093   DCHECK_NE(core_spill_mask_ & (1u << kLrCode), 0u)
2094       << "At least the return address register must be saved";
2095   // 16-bit PUSH/POP (T1) can save/restore just the LR/PC.
2096   DCHECK(GetVIXLAssembler()->IsUsingT32());
2097   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
2098   // We use vpush and vpop for saving and restoring floating point registers, which take
2099   // a SRegister and the number of registers to save/restore after that SRegister. We
2100   // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
2101   // but in the range.
2102   if (fpu_spill_mask_ != 0) {
2103     uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
2104     uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
2105     for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
2106       fpu_spill_mask_ |= (1 << i);
2107     }
2108   }
2109 }
2110 
MaybeIncrementHotness(bool is_frame_entry)2111 void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) {
2112   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
2113     UseScratchRegisterScope temps(GetVIXLAssembler());
2114     vixl32::Register temp = temps.Acquire();
2115     static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
2116     if (!is_frame_entry) {
2117       __ Push(vixl32::Register(kMethodRegister));
2118       GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2119       GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
2120     }
2121     // Load with zero extend to clear the high bits for integer overflow check.
2122     __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2123     __ Add(temp, temp, 1);
2124     // Subtract one if the counter would overflow.
2125     __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16));
2126     __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2127     if (!is_frame_entry) {
2128       __ Pop(vixl32::Register(kMethodRegister));
2129       GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2130     }
2131   }
2132 
2133   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2134     ScopedProfilingInfoUse spiu(
2135         Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
2136     ProfilingInfo* info = spiu.GetProfilingInfo();
2137     if (info != nullptr) {
2138       uint32_t address = reinterpret_cast32<uint32_t>(info);
2139       vixl::aarch32::Label done;
2140       UseScratchRegisterScope temps(GetVIXLAssembler());
2141       temps.Exclude(ip);
2142       if (!is_frame_entry) {
2143         __ Push(r4);  // Will be used as temporary. For frame entry, r4 is always available.
2144         GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2145       }
2146       __ Mov(r4, address);
2147       __ Ldrh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2148       __ Add(ip, ip, 1);
2149       instruction_visitor_.GenerateAndConst(ip, ip, interpreter::kTieredHotnessMask);
2150       __ Strh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2151       if (!is_frame_entry) {
2152         __ Pop(r4);
2153         GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2154       }
2155       __ Lsls(ip, ip, 16);
2156       __ B(ne, &done);
2157       uint32_t entry_point_offset =
2158           GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
2159       if (HasEmptyFrame()) {
2160         CHECK(is_frame_entry);
2161         // For leaf methods, we need to spill lr and r0. Also spill r1 and r2 for
2162         // alignment.
2163         uint32_t core_spill_mask =
2164             (1 << lr.GetCode()) | (1 << r0.GetCode()) | (1 << r1.GetCode()) | (1 << r2.GetCode());
2165         __ Push(RegisterList(core_spill_mask));
2166         GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask));
2167         __ Ldr(lr, MemOperand(tr, entry_point_offset));
2168         __ Blx(lr);
2169         __ Pop(RegisterList(core_spill_mask));
2170         GetAssembler()->cfi().AdjustCFAOffset(
2171             -static_cast<int>(kArmWordSize) * POPCOUNT(core_spill_mask));
2172       } else {
2173         if (!RequiresCurrentMethod()) {
2174           CHECK(is_frame_entry);
2175           GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2176         }
2177       __ Ldr(lr, MemOperand(tr, entry_point_offset));
2178       __ Blx(lr);
2179       }
2180       __ Bind(&done);
2181     }
2182   }
2183 }
2184 
GenerateFrameEntry()2185 void CodeGeneratorARMVIXL::GenerateFrameEntry() {
2186   bool skip_overflow_check =
2187       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
2188   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
2189   __ Bind(&frame_entry_label_);
2190 
2191   if (HasEmptyFrame()) {
2192     // Ensure that the CFI opcode list is not empty.
2193     GetAssembler()->cfi().Nop();
2194     MaybeIncrementHotness(/* is_frame_entry= */ true);
2195     return;
2196   }
2197 
2198   if (!skip_overflow_check) {
2199     // Using r4 instead of IP saves 2 bytes.
2200     UseScratchRegisterScope temps(GetVIXLAssembler());
2201     vixl32::Register temp;
2202     // TODO: Remove this check when R4 is made a callee-save register
2203     // in ART compiled code (b/72801708). Currently we need to make
2204     // sure r4 is not blocked, e.g. in special purpose
2205     // TestCodeGeneratorARMVIXL; also asserting that r4 is available
2206     // here.
2207     if (!blocked_core_registers_[R4]) {
2208       for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2209         DCHECK(!reg.Is(r4));
2210       }
2211       DCHECK(!kCoreCalleeSaves.Includes(r4));
2212       temp = r4;
2213     } else {
2214       temp = temps.Acquire();
2215     }
2216     __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
2217     // The load must immediately precede RecordPcInfo.
2218     ExactAssemblyScope aas(GetVIXLAssembler(),
2219                            vixl32::kMaxInstructionSizeInBytes,
2220                            CodeBufferCheckScope::kMaximumSize);
2221     __ ldr(temp, MemOperand(temp));
2222     RecordPcInfo(nullptr, 0);
2223   }
2224 
2225   uint32_t frame_size = GetFrameSize();
2226   uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2227   uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2228   if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2229       core_spills_offset <= 3u * kArmWordSize) {
2230     // Do a single PUSH for core registers including the method and up to two
2231     // filler registers. Then store the single FP spill if any.
2232     // (The worst case is when the method is not required and we actually
2233     // store 3 extra registers but they are stored in the same properly
2234     // aligned 16-byte chunk where we're already writing anyway.)
2235     DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2236     uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize);
2237     DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(core_spill_mask_));
2238     __ Push(RegisterList(core_spill_mask_ | extra_regs));
2239     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
2240     GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2241                                            core_spills_offset,
2242                                            core_spill_mask_,
2243                                            kArmWordSize);
2244     if (fpu_spill_mask_ != 0u) {
2245       DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2246       vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2247       GetAssembler()->StoreSToOffset(sreg, sp, fp_spills_offset);
2248       GetAssembler()->cfi().RelOffset(DWARFReg(sreg), /*offset=*/ fp_spills_offset);
2249     }
2250   } else {
2251     __ Push(RegisterList(core_spill_mask_));
2252     GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
2253     GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2254                                            /*offset=*/ 0,
2255                                            core_spill_mask_,
2256                                            kArmWordSize);
2257     if (fpu_spill_mask_ != 0) {
2258       uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2259 
2260       // Check that list is contiguous.
2261       DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2262 
2263       __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2264       GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
2265       GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0),
2266                                              /*offset=*/ 0,
2267                                              fpu_spill_mask_,
2268                                              kArmWordSize);
2269     }
2270 
2271     // Adjust SP and save the current method if we need it. Note that we do
2272     // not save the method in HCurrentMethod, as the instruction might have
2273     // been removed in the SSA graph.
2274     if (RequiresCurrentMethod() && fp_spills_offset <= 3 * kArmWordSize) {
2275       DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2276       __ Push(RegisterList(MaxInt<uint32_t>(fp_spills_offset / kArmWordSize)));
2277       GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset);
2278     } else {
2279       IncreaseFrame(fp_spills_offset);
2280       if (RequiresCurrentMethod()) {
2281         GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2282       }
2283     }
2284   }
2285 
2286   if (GetGraph()->HasShouldDeoptimizeFlag()) {
2287     UseScratchRegisterScope temps(GetVIXLAssembler());
2288     vixl32::Register temp = temps.Acquire();
2289     // Initialize should_deoptimize flag to 0.
2290     __ Mov(temp, 0);
2291     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
2292   }
2293 
2294   MaybeIncrementHotness(/* is_frame_entry= */ true);
2295   MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
2296 }
2297 
GenerateFrameExit()2298 void CodeGeneratorARMVIXL::GenerateFrameExit() {
2299   if (HasEmptyFrame()) {
2300     __ Bx(lr);
2301     return;
2302   }
2303 
2304   // Pop LR into PC to return.
2305   DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
2306   uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
2307 
2308   uint32_t frame_size = GetFrameSize();
2309   uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2310   uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2311   if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2312       // r4 is blocked by TestCodeGeneratorARMVIXL used by some tests.
2313       core_spills_offset <= (blocked_core_registers_[r4.GetCode()] ? 2u : 3u) * kArmWordSize) {
2314     // Load the FP spill if any and then do a single POP including the method
2315     // and up to two filler registers. If we have no FP spills, this also has
2316     // the advantage that we do not need to emit CFI directives.
2317     if (fpu_spill_mask_ != 0u) {
2318       DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2319       vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2320       GetAssembler()->cfi().RememberState();
2321       GetAssembler()->LoadSFromOffset(sreg, sp, fp_spills_offset);
2322       GetAssembler()->cfi().Restore(DWARFReg(sreg));
2323     }
2324     // Clobber registers r2-r4 as they are caller-save in ART managed ABI and
2325     // never hold the return value.
2326     uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize) << r2.GetCode();
2327     DCHECK_EQ(extra_regs & kCoreCalleeSaves.GetList(), 0u);
2328     DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(pop_mask));
2329     __ Pop(RegisterList(pop_mask | extra_regs));
2330     if (fpu_spill_mask_ != 0u) {
2331       GetAssembler()->cfi().RestoreState();
2332     }
2333   } else {
2334     GetAssembler()->cfi().RememberState();
2335     DecreaseFrame(fp_spills_offset);
2336     if (fpu_spill_mask_ != 0) {
2337       uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2338 
2339       // Check that list is contiguous.
2340       DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2341 
2342       __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2343       GetAssembler()->cfi().AdjustCFAOffset(
2344           -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
2345       GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
2346     }
2347     __ Pop(RegisterList(pop_mask));
2348     GetAssembler()->cfi().RestoreState();
2349     GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
2350   }
2351 }
2352 
Bind(HBasicBlock * block)2353 void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
2354   __ Bind(GetLabelOf(block));
2355 }
2356 
GetNextLocation(DataType::Type type)2357 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2358   switch (type) {
2359     case DataType::Type::kReference:
2360     case DataType::Type::kBool:
2361     case DataType::Type::kUint8:
2362     case DataType::Type::kInt8:
2363     case DataType::Type::kUint16:
2364     case DataType::Type::kInt16:
2365     case DataType::Type::kInt32: {
2366       uint32_t index = gp_index_++;
2367       uint32_t stack_index = stack_index_++;
2368       if (index < calling_convention.GetNumberOfRegisters()) {
2369         return LocationFrom(calling_convention.GetRegisterAt(index));
2370       } else {
2371         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2372       }
2373     }
2374 
2375     case DataType::Type::kInt64: {
2376       uint32_t index = gp_index_;
2377       uint32_t stack_index = stack_index_;
2378       gp_index_ += 2;
2379       stack_index_ += 2;
2380       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2381         if (calling_convention.GetRegisterAt(index).Is(r1)) {
2382           // Skip R1, and use R2_R3 instead.
2383           gp_index_++;
2384           index++;
2385         }
2386       }
2387       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2388         DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
2389                   calling_convention.GetRegisterAt(index + 1).GetCode());
2390 
2391         return LocationFrom(calling_convention.GetRegisterAt(index),
2392                             calling_convention.GetRegisterAt(index + 1));
2393       } else {
2394         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2395       }
2396     }
2397 
2398     case DataType::Type::kFloat32: {
2399       uint32_t stack_index = stack_index_++;
2400       if (float_index_ % 2 == 0) {
2401         float_index_ = std::max(double_index_, float_index_);
2402       }
2403       if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
2404         return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
2405       } else {
2406         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2407       }
2408     }
2409 
2410     case DataType::Type::kFloat64: {
2411       double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
2412       uint32_t stack_index = stack_index_;
2413       stack_index_ += 2;
2414       if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
2415         uint32_t index = double_index_;
2416         double_index_ += 2;
2417         Location result = LocationFrom(
2418           calling_convention.GetFpuRegisterAt(index),
2419           calling_convention.GetFpuRegisterAt(index + 1));
2420         DCHECK(ExpectedPairLayout(result));
2421         return result;
2422       } else {
2423         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2424       }
2425     }
2426 
2427     case DataType::Type::kUint32:
2428     case DataType::Type::kUint64:
2429     case DataType::Type::kVoid:
2430       LOG(FATAL) << "Unexpected parameter type " << type;
2431       UNREACHABLE();
2432   }
2433   return Location::NoLocation();
2434 }
2435 
GetReturnLocation(DataType::Type type) const2436 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
2437   switch (type) {
2438     case DataType::Type::kReference:
2439     case DataType::Type::kBool:
2440     case DataType::Type::kUint8:
2441     case DataType::Type::kInt8:
2442     case DataType::Type::kUint16:
2443     case DataType::Type::kInt16:
2444     case DataType::Type::kUint32:
2445     case DataType::Type::kInt32: {
2446       return LocationFrom(r0);
2447     }
2448 
2449     case DataType::Type::kFloat32: {
2450       return LocationFrom(s0);
2451     }
2452 
2453     case DataType::Type::kUint64:
2454     case DataType::Type::kInt64: {
2455       return LocationFrom(r0, r1);
2456     }
2457 
2458     case DataType::Type::kFloat64: {
2459       return LocationFrom(s0, s1);
2460     }
2461 
2462     case DataType::Type::kVoid:
2463       return Location::NoLocation();
2464   }
2465 
2466   UNREACHABLE();
2467 }
2468 
GetMethodLocation() const2469 Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2470   return LocationFrom(kMethodRegister);
2471 }
2472 
GetNextLocation(DataType::Type type)2473 Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2474   DCHECK_NE(type, DataType::Type::kReference);
2475 
2476   // Native ABI uses the same registers as managed, except that the method register r0
2477   // is a normal argument.
2478   Location location = Location::NoLocation();
2479   if (DataType::Is64BitType(type)) {
2480     gpr_index_ = RoundUp(gpr_index_, 2u);
2481     stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
2482     if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2483       location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
2484                               kParameterCoreRegistersVIXL[gpr_index_]);
2485       gpr_index_ += 2u;
2486     }
2487   } else {
2488     if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2489       location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
2490       ++gpr_index_;
2491     }
2492   }
2493   if (location.IsInvalid()) {
2494     if (DataType::Is64BitType(type)) {
2495       location = Location::DoubleStackSlot(stack_offset_);
2496       stack_offset_ += 2 * kFramePointerSize;
2497     } else {
2498       location = Location::StackSlot(stack_offset_);
2499       stack_offset_ += kFramePointerSize;
2500     }
2501 
2502     if (for_register_allocation_) {
2503       location = Location::Any();
2504     }
2505   }
2506   return location;
2507 }
2508 
GetReturnLocation(DataType::Type type) const2509 Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
2510     const {
2511   // We perform conversion to the managed ABI return register after the call if needed.
2512   InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
2513   return dex_calling_convention.GetReturnLocation(type);
2514 }
2515 
GetMethodLocation() const2516 Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2517   // Pass the method in the hidden argument R4.
2518   return Location::RegisterLocation(R4);
2519 }
2520 
Move32(Location destination,Location source)2521 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
2522   if (source.Equals(destination)) {
2523     return;
2524   }
2525   if (destination.IsRegister()) {
2526     if (source.IsRegister()) {
2527       __ Mov(RegisterFrom(destination), RegisterFrom(source));
2528     } else if (source.IsFpuRegister()) {
2529       __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
2530     } else {
2531       GetAssembler()->LoadFromOffset(kLoadWord,
2532                                      RegisterFrom(destination),
2533                                      sp,
2534                                      source.GetStackIndex());
2535     }
2536   } else if (destination.IsFpuRegister()) {
2537     if (source.IsRegister()) {
2538       __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
2539     } else if (source.IsFpuRegister()) {
2540       __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
2541     } else {
2542       GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
2543     }
2544   } else {
2545     DCHECK(destination.IsStackSlot()) << destination;
2546     if (source.IsRegister()) {
2547       GetAssembler()->StoreToOffset(kStoreWord,
2548                                     RegisterFrom(source),
2549                                     sp,
2550                                     destination.GetStackIndex());
2551     } else if (source.IsFpuRegister()) {
2552       GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
2553     } else {
2554       DCHECK(source.IsStackSlot()) << source;
2555       UseScratchRegisterScope temps(GetVIXLAssembler());
2556       vixl32::Register temp = temps.Acquire();
2557       GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
2558       GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
2559     }
2560   }
2561 }
2562 
MoveConstant(Location location,int32_t value)2563 void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
2564   DCHECK(location.IsRegister());
2565   __ Mov(RegisterFrom(location), value);
2566 }
2567 
MoveLocation(Location dst,Location src,DataType::Type dst_type)2568 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
2569   // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
2570   // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
2571   HParallelMove move(GetGraph()->GetAllocator());
2572   move.AddMove(src, dst, dst_type, nullptr);
2573   GetMoveResolver()->EmitNativeCode(&move);
2574 }
2575 
AddLocationAsTemp(Location location,LocationSummary * locations)2576 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
2577   if (location.IsRegister()) {
2578     locations->AddTemp(location);
2579   } else if (location.IsRegisterPair()) {
2580     locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
2581     locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
2582   } else {
2583     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2584   }
2585 }
2586 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2587 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
2588                                          HInstruction* instruction,
2589                                          uint32_t dex_pc,
2590                                          SlowPathCode* slow_path) {
2591   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2592 
2593   ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint);
2594   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2595   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2596   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2597   if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2598     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2599     // Ensure the pc position is recorded immediately after the `blx` instruction.
2600     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
2601     ExactAssemblyScope aas(GetVIXLAssembler(),
2602                            vixl32::k16BitT32InstructionSizeInBytes,
2603                            CodeBufferCheckScope::kExactSize);
2604     __ blx(lr);
2605     if (EntrypointRequiresStackMap(entrypoint)) {
2606       RecordPcInfo(instruction, dex_pc, slow_path);
2607     }
2608   } else {
2609     // Ensure the pc position is recorded immediately after the `bl` instruction.
2610     ExactAssemblyScope aas(GetVIXLAssembler(),
2611                            vixl32::k32BitT32InstructionSizeInBytes,
2612                            CodeBufferCheckScope::kExactSize);
2613     EmitEntrypointThunkCall(entrypoint_offset);
2614     if (EntrypointRequiresStackMap(entrypoint)) {
2615       RecordPcInfo(instruction, dex_pc, slow_path);
2616     }
2617   }
2618 }
2619 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2620 void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2621                                                                HInstruction* instruction,
2622                                                                SlowPathCode* slow_path) {
2623   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2624   __ Ldr(lr, MemOperand(tr, entry_point_offset));
2625   __ Blx(lr);
2626 }
2627 
HandleGoto(HInstruction * got,HBasicBlock * successor)2628 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2629   if (successor->IsExitBlock()) {
2630     DCHECK(got->GetPrevious()->AlwaysThrows());
2631     return;  // no code needed
2632   }
2633 
2634   HBasicBlock* block = got->GetBlock();
2635   HInstruction* previous = got->GetPrevious();
2636   HLoopInformation* info = block->GetLoopInformation();
2637 
2638   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2639     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
2640     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2641     return;
2642   }
2643   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2644     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2645     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
2646   }
2647   if (!codegen_->GoesToNextBlock(block, successor)) {
2648     __ B(codegen_->GetLabelOf(successor));
2649   }
2650 }
2651 
VisitGoto(HGoto * got)2652 void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
2653   got->SetLocations(nullptr);
2654 }
2655 
VisitGoto(HGoto * got)2656 void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
2657   HandleGoto(got, got->GetSuccessor());
2658 }
2659 
VisitTryBoundary(HTryBoundary * try_boundary)2660 void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2661   try_boundary->SetLocations(nullptr);
2662 }
2663 
VisitTryBoundary(HTryBoundary * try_boundary)2664 void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2665   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2666   if (!successor->IsExitBlock()) {
2667     HandleGoto(try_boundary, successor);
2668   }
2669 }
2670 
VisitExit(HExit * exit)2671 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
2672   exit->SetLocations(nullptr);
2673 }
2674 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)2675 void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
2676 }
2677 
GenerateCompareTestAndBranch(HCondition * condition,vixl32::Label * true_target,vixl32::Label * false_target,bool is_far_target)2678 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
2679                                                                    vixl32::Label* true_target,
2680                                                                    vixl32::Label* false_target,
2681                                                                    bool is_far_target) {
2682   if (true_target == false_target) {
2683     DCHECK(true_target != nullptr);
2684     __ B(true_target);
2685     return;
2686   }
2687 
2688   vixl32::Label* non_fallthrough_target;
2689   bool invert;
2690   bool emit_both_branches;
2691 
2692   if (true_target == nullptr) {
2693     // The true target is fallthrough.
2694     DCHECK(false_target != nullptr);
2695     non_fallthrough_target = false_target;
2696     invert = true;
2697     emit_both_branches = false;
2698   } else {
2699     non_fallthrough_target = true_target;
2700     invert = false;
2701     // Either the false target is fallthrough, or there is no fallthrough
2702     // and both branches must be emitted.
2703     emit_both_branches = (false_target != nullptr);
2704   }
2705 
2706   const auto cond = GenerateTest(condition, invert, codegen_);
2707 
2708   __ B(cond.first, non_fallthrough_target, is_far_target);
2709 
2710   if (emit_both_branches) {
2711     // No target falls through, we need to branch.
2712     __ B(false_target);
2713   }
2714 }
2715 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl32::Label * true_target,vixl32::Label * false_target,bool far_target)2716 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
2717                                                             size_t condition_input_index,
2718                                                             vixl32::Label* true_target,
2719                                                             vixl32::Label* false_target,
2720                                                             bool far_target) {
2721   HInstruction* cond = instruction->InputAt(condition_input_index);
2722 
2723   if (true_target == nullptr && false_target == nullptr) {
2724     // Nothing to do. The code always falls through.
2725     return;
2726   } else if (cond->IsIntConstant()) {
2727     // Constant condition, statically compared against "true" (integer value 1).
2728     if (cond->AsIntConstant()->IsTrue()) {
2729       if (true_target != nullptr) {
2730         __ B(true_target);
2731       }
2732     } else {
2733       DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond);
2734       if (false_target != nullptr) {
2735         __ B(false_target);
2736       }
2737     }
2738     return;
2739   }
2740 
2741   // The following code generates these patterns:
2742   //  (1) true_target == nullptr && false_target != nullptr
2743   //        - opposite condition true => branch to false_target
2744   //  (2) true_target != nullptr && false_target == nullptr
2745   //        - condition true => branch to true_target
2746   //  (3) true_target != nullptr && false_target != nullptr
2747   //        - condition true => branch to true_target
2748   //        - branch to false_target
2749   if (IsBooleanValueOrMaterializedCondition(cond)) {
2750     // Condition has been materialized, compare the output to 0.
2751     if (kIsDebugBuild) {
2752       Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
2753       DCHECK(cond_val.IsRegister());
2754     }
2755     if (true_target == nullptr) {
2756       __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
2757                                 false_target,
2758                                 far_target);
2759     } else {
2760       __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
2761                                    true_target,
2762                                    far_target);
2763     }
2764   } else {
2765     // Condition has not been materialized. Use its inputs as the comparison and
2766     // its condition as the branch condition.
2767     HCondition* condition = cond->AsCondition();
2768 
2769     // If this is a long or FP comparison that has been folded into
2770     // the HCondition, generate the comparison directly.
2771     DataType::Type type = condition->InputAt(0)->GetType();
2772     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2773       GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
2774       return;
2775     }
2776 
2777     vixl32::Label* non_fallthrough_target;
2778     vixl32::Condition arm_cond = vixl32::Condition::None();
2779     const vixl32::Register left = InputRegisterAt(cond, 0);
2780     const Operand right = InputOperandAt(cond, 1);
2781 
2782     if (true_target == nullptr) {
2783       arm_cond = ARMCondition(condition->GetOppositeCondition());
2784       non_fallthrough_target = false_target;
2785     } else {
2786       arm_cond = ARMCondition(condition->GetCondition());
2787       non_fallthrough_target = true_target;
2788     }
2789 
2790     if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
2791       if (arm_cond.Is(eq)) {
2792         __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
2793       } else {
2794         DCHECK(arm_cond.Is(ne));
2795         __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
2796       }
2797     } else {
2798       __ Cmp(left, right);
2799       __ B(arm_cond, non_fallthrough_target, far_target);
2800     }
2801   }
2802 
2803   // If neither branch falls through (case 3), the conditional branch to `true_target`
2804   // was already emitted (case 2) and we need to emit a jump to `false_target`.
2805   if (true_target != nullptr && false_target != nullptr) {
2806     __ B(false_target);
2807   }
2808 }
2809 
VisitIf(HIf * if_instr)2810 void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
2811   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2812   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2813     locations->SetInAt(0, Location::RequiresRegister());
2814   }
2815 }
2816 
VisitIf(HIf * if_instr)2817 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
2818   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2819   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2820   vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2821       nullptr : codegen_->GetLabelOf(true_successor);
2822   vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2823       nullptr : codegen_->GetLabelOf(false_successor);
2824   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2825 }
2826 
VisitDeoptimize(HDeoptimize * deoptimize)2827 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
2828   LocationSummary* locations = new (GetGraph()->GetAllocator())
2829       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2830   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2831   RegisterSet caller_saves = RegisterSet::Empty();
2832   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
2833   locations->SetCustomSlowPathCallerSaves(caller_saves);
2834   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2835     locations->SetInAt(0, Location::RequiresRegister());
2836   }
2837 }
2838 
VisitDeoptimize(HDeoptimize * deoptimize)2839 void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
2840   SlowPathCodeARMVIXL* slow_path =
2841       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
2842   GenerateTestAndBranch(deoptimize,
2843                         /* condition_input_index= */ 0,
2844                         slow_path->GetEntryLabel(),
2845                         /* false_target= */ nullptr);
2846 }
2847 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2848 void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2849   LocationSummary* locations = new (GetGraph()->GetAllocator())
2850       LocationSummary(flag, LocationSummary::kNoCall);
2851   locations->SetOut(Location::RequiresRegister());
2852 }
2853 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2854 void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2855   GetAssembler()->LoadFromOffset(kLoadWord,
2856                                  OutputRegister(flag),
2857                                  sp,
2858                                  codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
2859 }
2860 
VisitSelect(HSelect * select)2861 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
2862   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2863   const bool is_floating_point = DataType::IsFloatingPointType(select->GetType());
2864 
2865   if (is_floating_point) {
2866     locations->SetInAt(0, Location::RequiresFpuRegister());
2867     locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
2868   } else {
2869     locations->SetInAt(0, Location::RequiresRegister());
2870     locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
2871   }
2872 
2873   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2874     locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
2875     // The code generator handles overlap with the values, but not with the condition.
2876     locations->SetOut(Location::SameAsFirstInput());
2877   } else if (is_floating_point) {
2878     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2879   } else {
2880     if (!locations->InAt(1).IsConstant()) {
2881       locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
2882     }
2883 
2884     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2885   }
2886 }
2887 
VisitSelect(HSelect * select)2888 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
2889   HInstruction* const condition = select->GetCondition();
2890   const LocationSummary* const locations = select->GetLocations();
2891   const DataType::Type type = select->GetType();
2892   const Location first = locations->InAt(0);
2893   const Location out = locations->Out();
2894   const Location second = locations->InAt(1);
2895 
2896   // In the unlucky case the output of this instruction overlaps
2897   // with an input of an "emitted-at-use-site" condition, and
2898   // the output of this instruction is not one of its inputs, we'll
2899   // need to fallback to branches instead of conditional ARM instructions.
2900   bool output_overlaps_with_condition_inputs =
2901       !IsBooleanValueOrMaterializedCondition(condition) &&
2902       !out.Equals(first) &&
2903       !out.Equals(second) &&
2904       (condition->GetLocations()->InAt(0).Equals(out) ||
2905        condition->GetLocations()->InAt(1).Equals(out));
2906   DCHECK(!output_overlaps_with_condition_inputs || condition->IsCondition());
2907   Location src;
2908 
2909   if (condition->IsIntConstant()) {
2910     if (condition->AsIntConstant()->IsFalse()) {
2911       src = first;
2912     } else {
2913       src = second;
2914     }
2915 
2916     codegen_->MoveLocation(out, src, type);
2917     return;
2918   }
2919 
2920   if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
2921     bool invert = false;
2922 
2923     if (out.Equals(second)) {
2924       src = first;
2925       invert = true;
2926     } else if (out.Equals(first)) {
2927       src = second;
2928     } else if (second.IsConstant()) {
2929       DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
2930       src = second;
2931     } else if (first.IsConstant()) {
2932       DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
2933       src = first;
2934       invert = true;
2935     } else {
2936       src = second;
2937     }
2938 
2939     if (CanGenerateConditionalMove(out, src)) {
2940       if (!out.Equals(first) && !out.Equals(second)) {
2941         codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
2942       }
2943 
2944       std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
2945 
2946       if (IsBooleanValueOrMaterializedCondition(condition)) {
2947         __ Cmp(InputRegisterAt(select, 2), 0);
2948         cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
2949       } else {
2950         cond = GenerateTest(condition->AsCondition(), invert, codegen_);
2951       }
2952 
2953       const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
2954       // We use the scope because of the IT block that follows.
2955       ExactAssemblyScope guard(GetVIXLAssembler(),
2956                                instr_count * vixl32::k16BitT32InstructionSizeInBytes,
2957                                CodeBufferCheckScope::kExactSize);
2958 
2959       if (out.IsRegister()) {
2960         __ it(cond.first);
2961         __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
2962       } else {
2963         DCHECK(out.IsRegisterPair());
2964 
2965         Operand operand_high(0);
2966         Operand operand_low(0);
2967 
2968         if (src.IsConstant()) {
2969           const int64_t value = Int64ConstantFrom(src);
2970 
2971           operand_high = High32Bits(value);
2972           operand_low = Low32Bits(value);
2973         } else {
2974           DCHECK(src.IsRegisterPair());
2975           operand_high = HighRegisterFrom(src);
2976           operand_low = LowRegisterFrom(src);
2977         }
2978 
2979         __ it(cond.first);
2980         __ mov(cond.first, LowRegisterFrom(out), operand_low);
2981         __ it(cond.first);
2982         __ mov(cond.first, HighRegisterFrom(out), operand_high);
2983       }
2984 
2985       return;
2986     }
2987   }
2988 
2989   vixl32::Label* false_target = nullptr;
2990   vixl32::Label* true_target = nullptr;
2991   vixl32::Label select_end;
2992   vixl32::Label other_case;
2993   vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
2994 
2995   if (out.Equals(second)) {
2996     true_target = target;
2997     src = first;
2998   } else {
2999     false_target = target;
3000     src = second;
3001 
3002     if (!out.Equals(first)) {
3003       if (output_overlaps_with_condition_inputs) {
3004         false_target = &other_case;
3005       } else {
3006         codegen_->MoveLocation(out, first, type);
3007       }
3008     }
3009   }
3010 
3011   GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false);
3012   codegen_->MoveLocation(out, src, type);
3013   if (output_overlaps_with_condition_inputs) {
3014     __ B(target);
3015     __ Bind(&other_case);
3016     codegen_->MoveLocation(out, first, type);
3017   }
3018 
3019   if (select_end.IsReferenced()) {
3020     __ Bind(&select_end);
3021   }
3022 }
3023 
VisitNativeDebugInfo(HNativeDebugInfo * info)3024 void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3025   new (GetGraph()->GetAllocator()) LocationSummary(info);
3026 }
3027 
VisitNativeDebugInfo(HNativeDebugInfo *)3028 void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) {
3029   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3030 }
3031 
IncreaseFrame(size_t adjustment)3032 void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
3033   __ Claim(adjustment);
3034   GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3035 }
3036 
DecreaseFrame(size_t adjustment)3037 void CodeGeneratorARMVIXL::DecreaseFrame(size_t adjustment) {
3038   __ Drop(adjustment);
3039   GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3040 }
3041 
GenerateNop()3042 void CodeGeneratorARMVIXL::GenerateNop() {
3043   __ Nop();
3044 }
3045 
3046 // `temp` is an extra temporary register that is used for some conditions;
3047 // callers may not specify it, in which case the method will use a scratch
3048 // register instead.
GenerateConditionWithZero(IfCondition condition,vixl32::Register out,vixl32::Register in,vixl32::Register temp)3049 void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
3050                                                      vixl32::Register out,
3051                                                      vixl32::Register in,
3052                                                      vixl32::Register temp) {
3053   switch (condition) {
3054     case kCondEQ:
3055     // x <= 0 iff x == 0 when the comparison is unsigned.
3056     case kCondBE:
3057       if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
3058         temp = out;
3059       }
3060 
3061       // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
3062       // different as well.
3063       if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
3064         // temp = - in; only 0 sets the carry flag.
3065         __ Rsbs(temp, in, 0);
3066 
3067         if (out.Is(in)) {
3068           std::swap(in, temp);
3069         }
3070 
3071         // out = - in + in + carry = carry
3072         __ Adc(out, temp, in);
3073       } else {
3074         // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
3075         __ Clz(out, in);
3076         // Any number less than 32 logically shifted right by 5 bits results in 0;
3077         // the same operation on 32 yields 1.
3078         __ Lsr(out, out, 5);
3079       }
3080 
3081       break;
3082     case kCondNE:
3083     // x > 0 iff x != 0 when the comparison is unsigned.
3084     case kCondA: {
3085       UseScratchRegisterScope temps(GetVIXLAssembler());
3086 
3087       if (out.Is(in)) {
3088         if (!temp.IsValid() || in.Is(temp)) {
3089           temp = temps.Acquire();
3090         }
3091       } else if (!temp.IsValid() || !temp.IsLow()) {
3092         temp = out;
3093       }
3094 
3095       // temp = in - 1; only 0 does not set the carry flag.
3096       __ Subs(temp, in, 1);
3097       // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
3098       __ Sbc(out, in, temp);
3099       break;
3100     }
3101     case kCondGE:
3102       __ Mvn(out, in);
3103       in = out;
3104       FALLTHROUGH_INTENDED;
3105     case kCondLT:
3106       // We only care about the sign bit.
3107       __ Lsr(out, in, 31);
3108       break;
3109     case kCondAE:
3110       // Trivially true.
3111       __ Mov(out, 1);
3112       break;
3113     case kCondB:
3114       // Trivially false.
3115       __ Mov(out, 0);
3116       break;
3117     default:
3118       LOG(FATAL) << "Unexpected condition " << condition;
3119       UNREACHABLE();
3120   }
3121 }
3122 
HandleCondition(HCondition * cond)3123 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
3124   LocationSummary* locations =
3125       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
3126   const DataType::Type type = cond->InputAt(0)->GetType();
3127   if (DataType::IsFloatingPointType(type)) {
3128     locations->SetInAt(0, Location::RequiresFpuRegister());
3129     locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
3130   } else {
3131     locations->SetInAt(0, Location::RequiresRegister());
3132     locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
3133   }
3134   if (!cond->IsEmittedAtUseSite()) {
3135     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3136   }
3137 }
3138 
HandleCondition(HCondition * cond)3139 void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
3140   if (cond->IsEmittedAtUseSite()) {
3141     return;
3142   }
3143 
3144   const DataType::Type type = cond->GetLeft()->GetType();
3145 
3146   if (DataType::IsFloatingPointType(type)) {
3147     GenerateConditionGeneric(cond, codegen_);
3148     return;
3149   }
3150 
3151   DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
3152 
3153   const IfCondition condition = cond->GetCondition();
3154 
3155   // A condition with only one boolean input, or two boolean inputs without being equality or
3156   // inequality results from transformations done by the instruction simplifier, and is handled
3157   // as a regular condition with integral inputs.
3158   if (type == DataType::Type::kBool &&
3159       cond->GetRight()->GetType() == DataType::Type::kBool &&
3160       (condition == kCondEQ || condition == kCondNE)) {
3161     vixl32::Register left = InputRegisterAt(cond, 0);
3162     const vixl32::Register out = OutputRegister(cond);
3163     const Location right_loc = cond->GetLocations()->InAt(1);
3164 
3165     // The constant case is handled by the instruction simplifier.
3166     DCHECK(!right_loc.IsConstant());
3167 
3168     vixl32::Register right = RegisterFrom(right_loc);
3169 
3170     // Avoid 32-bit instructions if possible.
3171     if (out.Is(right)) {
3172       std::swap(left, right);
3173     }
3174 
3175     __ Eor(out, left, right);
3176 
3177     if (condition == kCondEQ) {
3178       __ Eor(out, out, 1);
3179     }
3180 
3181     return;
3182   }
3183 
3184   GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
3185 }
3186 
VisitEqual(HEqual * comp)3187 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
3188   HandleCondition(comp);
3189 }
3190 
VisitEqual(HEqual * comp)3191 void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) {
3192   HandleCondition(comp);
3193 }
3194 
VisitNotEqual(HNotEqual * comp)3195 void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) {
3196   HandleCondition(comp);
3197 }
3198 
VisitNotEqual(HNotEqual * comp)3199 void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) {
3200   HandleCondition(comp);
3201 }
3202 
VisitLessThan(HLessThan * comp)3203 void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) {
3204   HandleCondition(comp);
3205 }
3206 
VisitLessThan(HLessThan * comp)3207 void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) {
3208   HandleCondition(comp);
3209 }
3210 
VisitLessThanOrEqual(HLessThanOrEqual * comp)3211 void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3212   HandleCondition(comp);
3213 }
3214 
VisitLessThanOrEqual(HLessThanOrEqual * comp)3215 void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3216   HandleCondition(comp);
3217 }
3218 
VisitGreaterThan(HGreaterThan * comp)3219 void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3220   HandleCondition(comp);
3221 }
3222 
VisitGreaterThan(HGreaterThan * comp)3223 void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3224   HandleCondition(comp);
3225 }
3226 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3227 void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3228   HandleCondition(comp);
3229 }
3230 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3231 void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3232   HandleCondition(comp);
3233 }
3234 
VisitBelow(HBelow * comp)3235 void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) {
3236   HandleCondition(comp);
3237 }
3238 
VisitBelow(HBelow * comp)3239 void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) {
3240   HandleCondition(comp);
3241 }
3242 
VisitBelowOrEqual(HBelowOrEqual * comp)3243 void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3244   HandleCondition(comp);
3245 }
3246 
VisitBelowOrEqual(HBelowOrEqual * comp)3247 void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3248   HandleCondition(comp);
3249 }
3250 
VisitAbove(HAbove * comp)3251 void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) {
3252   HandleCondition(comp);
3253 }
3254 
VisitAbove(HAbove * comp)3255 void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) {
3256   HandleCondition(comp);
3257 }
3258 
VisitAboveOrEqual(HAboveOrEqual * comp)3259 void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3260   HandleCondition(comp);
3261 }
3262 
VisitAboveOrEqual(HAboveOrEqual * comp)3263 void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3264   HandleCondition(comp);
3265 }
3266 
VisitIntConstant(HIntConstant * constant)3267 void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
3268   LocationSummary* locations =
3269       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3270   locations->SetOut(Location::ConstantLocation(constant));
3271 }
3272 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)3273 void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
3274   // Will be generated at use site.
3275 }
3276 
VisitNullConstant(HNullConstant * constant)3277 void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
3278   LocationSummary* locations =
3279       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3280   locations->SetOut(Location::ConstantLocation(constant));
3281 }
3282 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)3283 void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
3284   // Will be generated at use site.
3285 }
3286 
VisitLongConstant(HLongConstant * constant)3287 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
3288   LocationSummary* locations =
3289       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3290   locations->SetOut(Location::ConstantLocation(constant));
3291 }
3292 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)3293 void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
3294   // Will be generated at use site.
3295 }
3296 
VisitFloatConstant(HFloatConstant * constant)3297 void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
3298   LocationSummary* locations =
3299       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3300   locations->SetOut(Location::ConstantLocation(constant));
3301 }
3302 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3303 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
3304     HFloatConstant* constant ATTRIBUTE_UNUSED) {
3305   // Will be generated at use site.
3306 }
3307 
VisitDoubleConstant(HDoubleConstant * constant)3308 void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
3309   LocationSummary* locations =
3310       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3311   locations->SetOut(Location::ConstantLocation(constant));
3312 }
3313 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3314 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
3315     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3316   // Will be generated at use site.
3317 }
3318 
VisitConstructorFence(HConstructorFence * constructor_fence)3319 void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
3320   constructor_fence->SetLocations(nullptr);
3321 }
3322 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)3323 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
3324     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
3325   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3326 }
3327 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3328 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3329   memory_barrier->SetLocations(nullptr);
3330 }
3331 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3332 void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3333   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
3334 }
3335 
VisitReturnVoid(HReturnVoid * ret)3336 void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
3337   ret->SetLocations(nullptr);
3338 }
3339 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)3340 void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
3341   codegen_->GenerateFrameExit();
3342 }
3343 
VisitReturn(HReturn * ret)3344 void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) {
3345   LocationSummary* locations =
3346       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
3347   locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
3348 }
3349 
VisitReturn(HReturn * ret)3350 void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret) {
3351   if (GetGraph()->IsCompilingOsr()) {
3352     // To simplify callers of an OSR method, we put the return value in both
3353     // floating point and core registers.
3354     switch (ret->InputAt(0)->GetType()) {
3355       case DataType::Type::kFloat32:
3356         __ Vmov(r0, s0);
3357         break;
3358       case DataType::Type::kFloat64:
3359         __ Vmov(r0, r1, d0);
3360         break;
3361       default:
3362         break;
3363     }
3364   }
3365   codegen_->GenerateFrameExit();
3366 }
3367 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3368 void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3369   // The trampoline uses the same calling convention as dex calling conventions,
3370   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3371   // the method_idx.
3372   HandleInvoke(invoke);
3373 }
3374 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3375 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3376   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3377   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3);
3378 }
3379 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3380 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3381   // Explicit clinit checks triggered by static invokes must have been pruned by
3382   // art::PrepareForRegisterAllocation.
3383   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3384 
3385   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3386   if (intrinsic.TryDispatch(invoke)) {
3387     return;
3388   }
3389 
3390   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3391     CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
3392         /*for_register_allocation=*/ true);
3393     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3394   } else {
3395     HandleInvoke(invoke);
3396   }
3397 }
3398 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3399 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3400   if (invoke->GetLocations()->Intrinsified()) {
3401     IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
3402     intrinsic.Dispatch(invoke);
3403     return true;
3404   }
3405   return false;
3406 }
3407 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3408 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3409   // Explicit clinit checks triggered by static invokes must have been pruned by
3410   // art::PrepareForRegisterAllocation.
3411   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3412 
3413   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3414     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4);
3415     return;
3416   }
3417 
3418   LocationSummary* locations = invoke->GetLocations();
3419   codegen_->GenerateStaticOrDirectCall(
3420       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3421 
3422   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5);
3423 }
3424 
HandleInvoke(HInvoke * invoke)3425 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
3426   InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
3427   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3428 }
3429 
VisitInvokeVirtual(HInvokeVirtual * invoke)3430 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3431   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3432   if (intrinsic.TryDispatch(invoke)) {
3433     return;
3434   }
3435 
3436   HandleInvoke(invoke);
3437 }
3438 
VisitInvokeVirtual(HInvokeVirtual * invoke)3439 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3440   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3441     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6);
3442     return;
3443   }
3444 
3445   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3446   DCHECK(!codegen_->IsLeafMethod());
3447 
3448   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7);
3449 }
3450 
VisitInvokeInterface(HInvokeInterface * invoke)3451 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3452   HandleInvoke(invoke);
3453   // Add the hidden argument.
3454   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3455     // We cannot request r12 as it's blocked by the register allocator.
3456     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
3457   }
3458 }
3459 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,vixl32::Register klass)3460 void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3461                                                          vixl32::Register klass) {
3462   DCHECK_EQ(r0.GetCode(), klass.GetCode());
3463   // We know the destination of an intrinsic, so no need to record inline
3464   // caches.
3465   if (!instruction->GetLocations()->Intrinsified() &&
3466       GetGraph()->IsCompilingBaseline() &&
3467       !Runtime::Current()->IsAotCompiler()) {
3468     DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
3469     ScopedProfilingInfoUse spiu(
3470         Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
3471     ProfilingInfo* info = spiu.GetProfilingInfo();
3472     if (info != nullptr) {
3473       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
3474       uint32_t address = reinterpret_cast32<uint32_t>(cache);
3475       vixl32::Label done;
3476       UseScratchRegisterScope temps(GetVIXLAssembler());
3477       temps.Exclude(ip);
3478       __ Mov(r4, address);
3479       __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
3480       // Fast path for a monomorphic cache.
3481       __ Cmp(klass, ip);
3482       __ B(eq, &done, /* is_far_target= */ false);
3483       InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
3484       __ Bind(&done);
3485     }
3486   }
3487 }
3488 
VisitInvokeInterface(HInvokeInterface * invoke)3489 void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3490   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3491   LocationSummary* locations = invoke->GetLocations();
3492   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3493   Location receiver = locations->InAt(0);
3494   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3495 
3496   DCHECK(!receiver.IsStackSlot());
3497 
3498   // Ensure the pc position is recorded immediately after the `ldr` instruction.
3499   {
3500     ExactAssemblyScope aas(GetVIXLAssembler(),
3501                            vixl32::kMaxInstructionSizeInBytes,
3502                            CodeBufferCheckScope::kMaximumSize);
3503     // /* HeapReference<Class> */ temp = receiver->klass_
3504     __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset));
3505     codegen_->MaybeRecordImplicitNullCheck(invoke);
3506   }
3507   // Instead of simply (possibly) unpoisoning `temp` here, we should
3508   // emit a read barrier for the previous class reference load.
3509   // However this is not required in practice, as this is an
3510   // intermediate/temporary reference and because the current
3511   // concurrent copying collector keeps the from-space memory
3512   // intact/accessible until the end of the marking phase (the
3513   // concurrent copying collector may not in the future).
3514   GetAssembler()->MaybeUnpoisonHeapReference(temp);
3515 
3516   // If we're compiling baseline, update the inline cache.
3517   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3518 
3519   GetAssembler()->LoadFromOffset(kLoadWord,
3520                                  temp,
3521                                  temp,
3522                                  mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
3523 
3524   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3525       invoke->GetImtIndex(), kArmPointerSize));
3526   // temp = temp->GetImtEntryAt(method_offset);
3527   GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
3528   uint32_t entry_point =
3529       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
3530   // LR = temp->GetEntryPoint();
3531   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
3532 
3533   {
3534     // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
3535     // instruction from clobbering it as they might use r12 as a scratch register.
3536     Location hidden_reg = Location::RegisterLocation(r12.GetCode());
3537     // The VIXL macro assembler may clobber any of the scratch registers that are available to it,
3538     // so it checks if the application is using them (by passing them to the macro assembler
3539     // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of
3540     // what is available, and is the opposite of the standard usage: Instead of requesting a
3541     // temporary location, it imposes an external constraint (i.e. a specific register is reserved
3542     // for the hidden argument). Note that this works even if VIXL needs a scratch register itself
3543     // (to materialize the constant), since the destination register becomes available for such use
3544     // internally for the duration of the macro instruction.
3545     UseScratchRegisterScope temps(GetVIXLAssembler());
3546     temps.Exclude(RegisterFrom(hidden_reg));
3547     if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3548       Location current_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
3549       if (current_method.IsStackSlot()) {
3550         GetAssembler()->LoadFromOffset(
3551             kLoadWord, RegisterFrom(hidden_reg), sp, current_method.GetStackIndex());
3552       } else {
3553         __ Mov(RegisterFrom(hidden_reg), RegisterFrom(current_method));
3554       }
3555     } else if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3556       // We pass the method from the IMT in case of a conflict. This will ensure
3557       // we go into the runtime to resolve the actual method.
3558       CHECK_NE(temp.GetCode(), lr.GetCode());
3559       __ Mov(RegisterFrom(hidden_reg), temp);
3560     } else {
3561       codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3562     }
3563   }
3564   {
3565     // Ensure the pc position is recorded immediately after the `blx` instruction.
3566     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
3567     ExactAssemblyScope aas(GetVIXLAssembler(),
3568                            vixl32::k16BitT32InstructionSizeInBytes,
3569                            CodeBufferCheckScope::kExactSize);
3570     // LR();
3571     __ blx(lr);
3572     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3573     DCHECK(!codegen_->IsLeafMethod());
3574   }
3575 
3576   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8);
3577 }
3578 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3579 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3580   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3581   if (intrinsic.TryDispatch(invoke)) {
3582     return;
3583   }
3584   HandleInvoke(invoke);
3585 }
3586 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3587 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3588   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3589     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9);
3590     return;
3591   }
3592   codegen_->GenerateInvokePolymorphicCall(invoke);
3593   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10);
3594 }
3595 
VisitInvokeCustom(HInvokeCustom * invoke)3596 void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3597   HandleInvoke(invoke);
3598 }
3599 
VisitInvokeCustom(HInvokeCustom * invoke)3600 void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3601   codegen_->GenerateInvokeCustomCall(invoke);
3602   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11);
3603 }
3604 
VisitNeg(HNeg * neg)3605 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
3606   LocationSummary* locations =
3607       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3608   switch (neg->GetResultType()) {
3609     case DataType::Type::kInt32: {
3610       locations->SetInAt(0, Location::RequiresRegister());
3611       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3612       break;
3613     }
3614     case DataType::Type::kInt64: {
3615       locations->SetInAt(0, Location::RequiresRegister());
3616       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3617       break;
3618     }
3619 
3620     case DataType::Type::kFloat32:
3621     case DataType::Type::kFloat64:
3622       locations->SetInAt(0, Location::RequiresFpuRegister());
3623       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3624       break;
3625 
3626     default:
3627       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3628   }
3629 }
3630 
VisitNeg(HNeg * neg)3631 void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
3632   LocationSummary* locations = neg->GetLocations();
3633   Location out = locations->Out();
3634   Location in = locations->InAt(0);
3635   switch (neg->GetResultType()) {
3636     case DataType::Type::kInt32:
3637       __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
3638       break;
3639 
3640     case DataType::Type::kInt64:
3641       // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
3642       __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
3643       // We cannot emit an RSC (Reverse Subtract with Carry)
3644       // instruction here, as it does not exist in the Thumb-2
3645       // instruction set.  We use the following approach
3646       // using SBC and SUB instead.
3647       //
3648       // out.hi = -C
3649       __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
3650       // out.hi = out.hi - in.hi
3651       __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
3652       break;
3653 
3654     case DataType::Type::kFloat32:
3655     case DataType::Type::kFloat64:
3656       __ Vneg(OutputVRegister(neg), InputVRegister(neg));
3657       break;
3658 
3659     default:
3660       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3661   }
3662 }
3663 
VisitTypeConversion(HTypeConversion * conversion)3664 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3665   DataType::Type result_type = conversion->GetResultType();
3666   DataType::Type input_type = conversion->GetInputType();
3667   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3668       << input_type << " -> " << result_type;
3669 
3670   // The float-to-long, double-to-long and long-to-float type conversions
3671   // rely on a call to the runtime.
3672   LocationSummary::CallKind call_kind =
3673       (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3674         && result_type == DataType::Type::kInt64)
3675        || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32))
3676       ? LocationSummary::kCallOnMainOnly
3677       : LocationSummary::kNoCall;
3678   LocationSummary* locations =
3679       new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3680 
3681   switch (result_type) {
3682     case DataType::Type::kUint8:
3683     case DataType::Type::kInt8:
3684     case DataType::Type::kUint16:
3685     case DataType::Type::kInt16:
3686       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3687       locations->SetInAt(0, Location::RequiresRegister());
3688       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3689       break;
3690 
3691     case DataType::Type::kInt32:
3692       switch (input_type) {
3693         case DataType::Type::kInt64:
3694           locations->SetInAt(0, Location::Any());
3695           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3696           break;
3697 
3698         case DataType::Type::kFloat32:
3699           locations->SetInAt(0, Location::RequiresFpuRegister());
3700           locations->SetOut(Location::RequiresRegister());
3701           locations->AddTemp(Location::RequiresFpuRegister());
3702           break;
3703 
3704         case DataType::Type::kFloat64:
3705           locations->SetInAt(0, Location::RequiresFpuRegister());
3706           locations->SetOut(Location::RequiresRegister());
3707           locations->AddTemp(Location::RequiresFpuRegister());
3708           break;
3709 
3710         default:
3711           LOG(FATAL) << "Unexpected type conversion from " << input_type
3712                      << " to " << result_type;
3713       }
3714       break;
3715 
3716     case DataType::Type::kInt64:
3717       switch (input_type) {
3718         case DataType::Type::kBool:
3719         case DataType::Type::kUint8:
3720         case DataType::Type::kInt8:
3721         case DataType::Type::kUint16:
3722         case DataType::Type::kInt16:
3723         case DataType::Type::kInt32:
3724           locations->SetInAt(0, Location::RequiresRegister());
3725           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3726           break;
3727 
3728         case DataType::Type::kFloat32: {
3729           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3730           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3731           locations->SetOut(LocationFrom(r0, r1));
3732           break;
3733         }
3734 
3735         case DataType::Type::kFloat64: {
3736           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3737           locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
3738                                              calling_convention.GetFpuRegisterAt(1)));
3739           locations->SetOut(LocationFrom(r0, r1));
3740           break;
3741         }
3742 
3743         default:
3744           LOG(FATAL) << "Unexpected type conversion from " << input_type
3745                      << " to " << result_type;
3746       }
3747       break;
3748 
3749     case DataType::Type::kFloat32:
3750       switch (input_type) {
3751         case DataType::Type::kBool:
3752         case DataType::Type::kUint8:
3753         case DataType::Type::kInt8:
3754         case DataType::Type::kUint16:
3755         case DataType::Type::kInt16:
3756         case DataType::Type::kInt32:
3757           locations->SetInAt(0, Location::RequiresRegister());
3758           locations->SetOut(Location::RequiresFpuRegister());
3759           break;
3760 
3761         case DataType::Type::kInt64: {
3762           InvokeRuntimeCallingConventionARMVIXL calling_convention;
3763           locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
3764                                              calling_convention.GetRegisterAt(1)));
3765           locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3766           break;
3767         }
3768 
3769         case DataType::Type::kFloat64:
3770           locations->SetInAt(0, Location::RequiresFpuRegister());
3771           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3772           break;
3773 
3774         default:
3775           LOG(FATAL) << "Unexpected type conversion from " << input_type
3776                      << " to " << result_type;
3777       }
3778       break;
3779 
3780     case DataType::Type::kFloat64:
3781       switch (input_type) {
3782         case DataType::Type::kBool:
3783         case DataType::Type::kUint8:
3784         case DataType::Type::kInt8:
3785         case DataType::Type::kUint16:
3786         case DataType::Type::kInt16:
3787         case DataType::Type::kInt32:
3788           locations->SetInAt(0, Location::RequiresRegister());
3789           locations->SetOut(Location::RequiresFpuRegister());
3790           break;
3791 
3792         case DataType::Type::kInt64:
3793           locations->SetInAt(0, Location::RequiresRegister());
3794           locations->SetOut(Location::RequiresFpuRegister());
3795           locations->AddTemp(Location::RequiresFpuRegister());
3796           locations->AddTemp(Location::RequiresFpuRegister());
3797           break;
3798 
3799         case DataType::Type::kFloat32:
3800           locations->SetInAt(0, Location::RequiresFpuRegister());
3801           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3802           break;
3803 
3804         default:
3805           LOG(FATAL) << "Unexpected type conversion from " << input_type
3806                      << " to " << result_type;
3807       }
3808       break;
3809 
3810     default:
3811       LOG(FATAL) << "Unexpected type conversion from " << input_type
3812                  << " to " << result_type;
3813   }
3814 }
3815 
VisitTypeConversion(HTypeConversion * conversion)3816 void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3817   LocationSummary* locations = conversion->GetLocations();
3818   Location out = locations->Out();
3819   Location in = locations->InAt(0);
3820   DataType::Type result_type = conversion->GetResultType();
3821   DataType::Type input_type = conversion->GetInputType();
3822   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3823       << input_type << " -> " << result_type;
3824   switch (result_type) {
3825     case DataType::Type::kUint8:
3826       switch (input_type) {
3827         case DataType::Type::kInt8:
3828         case DataType::Type::kUint16:
3829         case DataType::Type::kInt16:
3830         case DataType::Type::kInt32:
3831           __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
3832           break;
3833         case DataType::Type::kInt64:
3834           __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
3835           break;
3836 
3837         default:
3838           LOG(FATAL) << "Unexpected type conversion from " << input_type
3839                      << " to " << result_type;
3840       }
3841       break;
3842 
3843     case DataType::Type::kInt8:
3844       switch (input_type) {
3845         case DataType::Type::kUint8:
3846         case DataType::Type::kUint16:
3847         case DataType::Type::kInt16:
3848         case DataType::Type::kInt32:
3849           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
3850           break;
3851         case DataType::Type::kInt64:
3852           __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
3853           break;
3854 
3855         default:
3856           LOG(FATAL) << "Unexpected type conversion from " << input_type
3857                      << " to " << result_type;
3858       }
3859       break;
3860 
3861     case DataType::Type::kUint16:
3862       switch (input_type) {
3863         case DataType::Type::kInt8:
3864         case DataType::Type::kInt16:
3865         case DataType::Type::kInt32:
3866           __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
3867           break;
3868         case DataType::Type::kInt64:
3869           __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
3870           break;
3871 
3872         default:
3873           LOG(FATAL) << "Unexpected type conversion from " << input_type
3874                      << " to " << result_type;
3875       }
3876       break;
3877 
3878     case DataType::Type::kInt16:
3879       switch (input_type) {
3880         case DataType::Type::kUint16:
3881         case DataType::Type::kInt32:
3882           __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
3883           break;
3884         case DataType::Type::kInt64:
3885           __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
3886           break;
3887 
3888         default:
3889           LOG(FATAL) << "Unexpected type conversion from " << input_type
3890                      << " to " << result_type;
3891       }
3892       break;
3893 
3894     case DataType::Type::kInt32:
3895       switch (input_type) {
3896         case DataType::Type::kInt64:
3897           DCHECK(out.IsRegister());
3898           if (in.IsRegisterPair()) {
3899             __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
3900           } else if (in.IsDoubleStackSlot()) {
3901             GetAssembler()->LoadFromOffset(kLoadWord,
3902                                            OutputRegister(conversion),
3903                                            sp,
3904                                            in.GetStackIndex());
3905           } else {
3906             DCHECK(in.IsConstant());
3907             DCHECK(in.GetConstant()->IsLongConstant());
3908             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3909             __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
3910           }
3911           break;
3912 
3913         case DataType::Type::kFloat32: {
3914           vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
3915           __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
3916           __ Vmov(OutputRegister(conversion), temp);
3917           break;
3918         }
3919 
3920         case DataType::Type::kFloat64: {
3921           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
3922           __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
3923           __ Vmov(OutputRegister(conversion), temp_s);
3924           break;
3925         }
3926 
3927         default:
3928           LOG(FATAL) << "Unexpected type conversion from " << input_type
3929                      << " to " << result_type;
3930       }
3931       break;
3932 
3933     case DataType::Type::kInt64:
3934       switch (input_type) {
3935         case DataType::Type::kBool:
3936         case DataType::Type::kUint8:
3937         case DataType::Type::kInt8:
3938         case DataType::Type::kUint16:
3939         case DataType::Type::kInt16:
3940         case DataType::Type::kInt32:
3941           DCHECK(out.IsRegisterPair());
3942           DCHECK(in.IsRegister());
3943           __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
3944           // Sign extension.
3945           __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
3946           break;
3947 
3948         case DataType::Type::kFloat32:
3949           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3950           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3951           break;
3952 
3953         case DataType::Type::kFloat64:
3954           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3955           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3956           break;
3957 
3958         default:
3959           LOG(FATAL) << "Unexpected type conversion from " << input_type
3960                      << " to " << result_type;
3961       }
3962       break;
3963 
3964     case DataType::Type::kFloat32:
3965       switch (input_type) {
3966         case DataType::Type::kBool:
3967         case DataType::Type::kUint8:
3968         case DataType::Type::kInt8:
3969         case DataType::Type::kUint16:
3970         case DataType::Type::kInt16:
3971         case DataType::Type::kInt32:
3972           __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
3973           __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
3974           break;
3975 
3976         case DataType::Type::kInt64:
3977           codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc());
3978           CheckEntrypointTypes<kQuickL2f, float, int64_t>();
3979           break;
3980 
3981         case DataType::Type::kFloat64:
3982           __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
3983           break;
3984 
3985         default:
3986           LOG(FATAL) << "Unexpected type conversion from " << input_type
3987                      << " to " << result_type;
3988       }
3989       break;
3990 
3991     case DataType::Type::kFloat64:
3992       switch (input_type) {
3993         case DataType::Type::kBool:
3994         case DataType::Type::kUint8:
3995         case DataType::Type::kInt8:
3996         case DataType::Type::kUint16:
3997         case DataType::Type::kInt16:
3998         case DataType::Type::kInt32:
3999           __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
4000           __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
4001           break;
4002 
4003         case DataType::Type::kInt64: {
4004           vixl32::Register low = LowRegisterFrom(in);
4005           vixl32::Register high = HighRegisterFrom(in);
4006           vixl32::SRegister out_s = LowSRegisterFrom(out);
4007           vixl32::DRegister out_d = DRegisterFrom(out);
4008           vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
4009           vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
4010           vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
4011 
4012           // temp_d = int-to-double(high)
4013           __ Vmov(temp_s, high);
4014           __ Vcvt(F64, S32, temp_d, temp_s);
4015           // constant_d = k2Pow32EncodingForDouble
4016           __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
4017           // out_d = unsigned-to-double(low)
4018           __ Vmov(out_s, low);
4019           __ Vcvt(F64, U32, out_d, out_s);
4020           // out_d += temp_d * constant_d
4021           __ Vmla(F64, out_d, temp_d, constant_d);
4022           break;
4023         }
4024 
4025         case DataType::Type::kFloat32:
4026           __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
4027           break;
4028 
4029         default:
4030           LOG(FATAL) << "Unexpected type conversion from " << input_type
4031                      << " to " << result_type;
4032       }
4033       break;
4034 
4035     default:
4036       LOG(FATAL) << "Unexpected type conversion from " << input_type
4037                  << " to " << result_type;
4038   }
4039 }
4040 
VisitAdd(HAdd * add)4041 void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) {
4042   LocationSummary* locations =
4043       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
4044   switch (add->GetResultType()) {
4045     case DataType::Type::kInt32: {
4046       locations->SetInAt(0, Location::RequiresRegister());
4047       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
4048       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4049       break;
4050     }
4051 
4052     case DataType::Type::kInt64: {
4053       locations->SetInAt(0, Location::RequiresRegister());
4054       locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
4055       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4056       break;
4057     }
4058 
4059     case DataType::Type::kFloat32:
4060     case DataType::Type::kFloat64: {
4061       locations->SetInAt(0, Location::RequiresFpuRegister());
4062       locations->SetInAt(1, Location::RequiresFpuRegister());
4063       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4064       break;
4065     }
4066 
4067     default:
4068       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4069   }
4070 }
4071 
VisitAdd(HAdd * add)4072 void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) {
4073   LocationSummary* locations = add->GetLocations();
4074   Location out = locations->Out();
4075   Location first = locations->InAt(0);
4076   Location second = locations->InAt(1);
4077 
4078   switch (add->GetResultType()) {
4079     case DataType::Type::kInt32: {
4080       __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1));
4081       }
4082       break;
4083 
4084     case DataType::Type::kInt64: {
4085       if (second.IsConstant()) {
4086         uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4087         GenerateAddLongConst(out, first, value);
4088       } else {
4089         DCHECK(second.IsRegisterPair());
4090         __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4091         __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4092       }
4093       break;
4094     }
4095 
4096     case DataType::Type::kFloat32:
4097     case DataType::Type::kFloat64:
4098       __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
4099       break;
4100 
4101     default:
4102       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4103   }
4104 }
4105 
VisitSub(HSub * sub)4106 void LocationsBuilderARMVIXL::VisitSub(HSub* sub) {
4107   LocationSummary* locations =
4108       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
4109   switch (sub->GetResultType()) {
4110     case DataType::Type::kInt32: {
4111       locations->SetInAt(0, Location::RequiresRegister());
4112       locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
4113       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4114       break;
4115     }
4116 
4117     case DataType::Type::kInt64: {
4118       locations->SetInAt(0, Location::RequiresRegister());
4119       locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
4120       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4121       break;
4122     }
4123     case DataType::Type::kFloat32:
4124     case DataType::Type::kFloat64: {
4125       locations->SetInAt(0, Location::RequiresFpuRegister());
4126       locations->SetInAt(1, Location::RequiresFpuRegister());
4127       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4128       break;
4129     }
4130     default:
4131       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4132   }
4133 }
4134 
VisitSub(HSub * sub)4135 void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) {
4136   LocationSummary* locations = sub->GetLocations();
4137   Location out = locations->Out();
4138   Location first = locations->InAt(0);
4139   Location second = locations->InAt(1);
4140   switch (sub->GetResultType()) {
4141     case DataType::Type::kInt32: {
4142       __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
4143       break;
4144     }
4145 
4146     case DataType::Type::kInt64: {
4147       if (second.IsConstant()) {
4148         uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4149         GenerateAddLongConst(out, first, -value);
4150       } else {
4151         DCHECK(second.IsRegisterPair());
4152         __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4153         __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4154       }
4155       break;
4156     }
4157 
4158     case DataType::Type::kFloat32:
4159     case DataType::Type::kFloat64:
4160       __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
4161       break;
4162 
4163     default:
4164       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4165   }
4166 }
4167 
VisitMul(HMul * mul)4168 void LocationsBuilderARMVIXL::VisitMul(HMul* mul) {
4169   LocationSummary* locations =
4170       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4171   switch (mul->GetResultType()) {
4172     case DataType::Type::kInt32:
4173     case DataType::Type::kInt64:  {
4174       locations->SetInAt(0, Location::RequiresRegister());
4175       locations->SetInAt(1, Location::RequiresRegister());
4176       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4177       break;
4178     }
4179 
4180     case DataType::Type::kFloat32:
4181     case DataType::Type::kFloat64: {
4182       locations->SetInAt(0, Location::RequiresFpuRegister());
4183       locations->SetInAt(1, Location::RequiresFpuRegister());
4184       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4185       break;
4186     }
4187 
4188     default:
4189       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4190   }
4191 }
4192 
VisitMul(HMul * mul)4193 void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) {
4194   LocationSummary* locations = mul->GetLocations();
4195   Location out = locations->Out();
4196   Location first = locations->InAt(0);
4197   Location second = locations->InAt(1);
4198   switch (mul->GetResultType()) {
4199     case DataType::Type::kInt32: {
4200       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
4201       break;
4202     }
4203     case DataType::Type::kInt64: {
4204       vixl32::Register out_hi = HighRegisterFrom(out);
4205       vixl32::Register out_lo = LowRegisterFrom(out);
4206       vixl32::Register in1_hi = HighRegisterFrom(first);
4207       vixl32::Register in1_lo = LowRegisterFrom(first);
4208       vixl32::Register in2_hi = HighRegisterFrom(second);
4209       vixl32::Register in2_lo = LowRegisterFrom(second);
4210 
4211       // Extra checks to protect caused by the existence of R1_R2.
4212       // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
4213       // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
4214       DCHECK(!out_hi.Is(in1_lo));
4215       DCHECK(!out_hi.Is(in2_lo));
4216 
4217       // input: in1 - 64 bits, in2 - 64 bits
4218       // output: out
4219       // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
4220       // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
4221       // parts: out.lo = (in1.lo * in2.lo)[31:0]
4222 
4223       UseScratchRegisterScope temps(GetVIXLAssembler());
4224       vixl32::Register temp = temps.Acquire();
4225       // temp <- in1.lo * in2.hi
4226       __ Mul(temp, in1_lo, in2_hi);
4227       // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo
4228       __ Mla(out_hi, in1_hi, in2_lo, temp);
4229       // out.lo <- (in1.lo * in2.lo)[31:0];
4230       __ Umull(out_lo, temp, in1_lo, in2_lo);
4231       // out.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
4232       __ Add(out_hi, out_hi, temp);
4233       break;
4234     }
4235 
4236     case DataType::Type::kFloat32:
4237     case DataType::Type::kFloat64:
4238       __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
4239       break;
4240 
4241     default:
4242       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4243   }
4244 }
4245 
DivRemOneOrMinusOne(HBinaryOperation * instruction)4246 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4247   DCHECK(instruction->IsDiv() || instruction->IsRem());
4248   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4249 
4250   Location second = instruction->GetLocations()->InAt(1);
4251   DCHECK(second.IsConstant());
4252 
4253   vixl32::Register out = OutputRegister(instruction);
4254   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4255   int32_t imm = Int32ConstantFrom(second);
4256   DCHECK(imm == 1 || imm == -1);
4257 
4258   if (instruction->IsRem()) {
4259     __ Mov(out, 0);
4260   } else {
4261     if (imm == 1) {
4262       __ Mov(out, dividend);
4263     } else {
4264       __ Rsb(out, dividend, 0);
4265     }
4266   }
4267 }
4268 
DivRemByPowerOfTwo(HBinaryOperation * instruction)4269 void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
4270   DCHECK(instruction->IsDiv() || instruction->IsRem());
4271   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4272 
4273   LocationSummary* locations = instruction->GetLocations();
4274   Location second = locations->InAt(1);
4275   DCHECK(second.IsConstant());
4276 
4277   vixl32::Register out = OutputRegister(instruction);
4278   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4279   int32_t imm = Int32ConstantFrom(second);
4280   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4281   int ctz_imm = CTZ(abs_imm);
4282 
4283   auto generate_div_code = [this, imm, ctz_imm](vixl32::Register out, vixl32::Register in) {
4284     __ Asr(out, in, ctz_imm);
4285     if (imm < 0) {
4286       __ Rsb(out, out, 0);
4287     }
4288   };
4289 
4290   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
4291     // No need to adjust the result for non-negative dividends or the INT32_MIN dividend.
4292     // NOTE: The generated code for HDiv/HRem correctly works for the INT32_MIN dividend:
4293     //   imm == 2
4294     //     HDiv
4295     //      add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
4296     //      asr out, out(0x80000001), #1 => out = 0xc0000000
4297     //      This is the same as 'asr out, dividend(0x80000000), #1'
4298     //
4299     //   imm > 2
4300     //     HDiv
4301     //      asr out, dividend(0x80000000), #31 => out = -1
4302     //      add out, dividend(0x80000000), out(-1), lsr #(32 - ctz_imm) => out = 0b10..01..1,
4303     //          where the number of the rightmost 1s is ctz_imm.
4304     //      asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
4305     //          leftmost 1s is ctz_imm + 1.
4306     //      This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
4307     //
4308     //   imm == INT32_MIN
4309     //     HDiv
4310     //      asr out, dividend(0x80000000), #31 => out = -1
4311     //      add out, dividend(0x80000000), out(-1), lsr #1 => out = 0xc0000000
4312     //      asr out, out(0xc0000000), #31 => out = -1
4313     //      rsb out, out(-1), #0 => out = 1
4314     //      This is the same as
4315     //        asr out, dividend(0x80000000), #31
4316     //        rsb out, out, #0
4317     //
4318     //
4319     //   INT_MIN % imm must be 0 for any imm of power 2. 'and' and 'ubfx' work only with bits
4320     //   0..30 of a dividend. For INT32_MIN those bits are zeros. So 'and' and 'ubfx' always
4321     //   produce zero.
4322     if (instruction->IsDiv()) {
4323       generate_div_code(out, dividend);
4324     } else {
4325       if (GetVIXLAssembler()->IsModifiedImmediate(abs_imm - 1)) {
4326         __ And(out, dividend, abs_imm - 1);
4327       } else {
4328         __ Ubfx(out, dividend, 0, ctz_imm);
4329       }
4330       return;
4331     }
4332   } else {
4333     vixl32::Register add_right_input = dividend;
4334     if (ctz_imm > 1) {
4335       __ Asr(out, dividend, 31);
4336       add_right_input = out;
4337     }
4338     __ Add(out, dividend, Operand(add_right_input, vixl32::LSR, 32 - ctz_imm));
4339 
4340     if (instruction->IsDiv()) {
4341       generate_div_code(out, out);
4342     } else {
4343       __ Bfc(out, 0, ctz_imm);
4344       __ Sub(out, dividend, out);
4345     }
4346   }
4347 }
4348 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4349 void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4350   DCHECK(instruction->IsDiv() || instruction->IsRem());
4351   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4352 
4353   LocationSummary* locations = instruction->GetLocations();
4354   Location second = locations->InAt(1);
4355   DCHECK(second.IsConstant());
4356 
4357   vixl32::Register out = OutputRegister(instruction);
4358   vixl32::Register dividend = InputRegisterAt(instruction, 0);
4359   vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
4360   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
4361   int32_t imm = Int32ConstantFrom(second);
4362 
4363   int64_t magic;
4364   int shift;
4365   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4366 
4367   auto generate_unsigned_div_code =[this, magic, shift](vixl32::Register out,
4368                                                         vixl32::Register dividend,
4369                                                         vixl32::Register temp1,
4370                                                         vixl32::Register temp2) {
4371     // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4372     __ Mov(temp1, static_cast<int32_t>(magic));
4373     if (magic > 0 && shift == 0) {
4374       __ Smull(temp2, out, dividend, temp1);
4375     } else {
4376       __ Smull(temp2, temp1, dividend, temp1);
4377       if (magic < 0) {
4378         // The negative magic M = static_cast<int>(m) means that the multiplier m is greater
4379         // than INT32_MAX. In such a case shift is never 0.
4380         // Proof:
4381         //   m = (2^p + d - 2^p % d) / d, where p = 32 + shift, d > 2
4382         //
4383         //   If shift == 0, m = (2^32 + d - 2^32 % d) / d =
4384         //   = (2^32 + d - (2^32 - (2^32 / d) * d)) / d =
4385         //   = (d + (2^32 / d) * d) / d = 1 + (2^32 / d), here '/' is the integer division.
4386         //
4387         //   1 + (2^32 / d) is decreasing when d is increasing.
4388         //   The maximum is 1 431 655 766, when d == 3. This value is less than INT32_MAX.
4389         //   the minimum is 3, when d = 2^31 -1.
4390         //   So for all values of d in [3, INT32_MAX] m with p == 32 is in [3, INT32_MAX) and
4391         //   is never less than 0.
4392         __ Add(temp1, temp1, dividend);
4393       }
4394       DCHECK_NE(shift, 0);
4395       __ Lsr(out, temp1, shift);
4396     }
4397   };
4398 
4399   if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
4400     // No need to adjust the result for a non-negative dividend and a positive divisor.
4401     if (instruction->IsDiv()) {
4402       generate_unsigned_div_code(out, dividend, temp1, temp2);
4403     } else {
4404       generate_unsigned_div_code(temp1, dividend, temp1, temp2);
4405       __ Mov(temp2, imm);
4406       __ Mls(out, temp1, temp2, dividend);
4407     }
4408   } else {
4409     // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4410     __ Mov(temp1, static_cast<int32_t>(magic));
4411     __ Smull(temp2, temp1, dividend, temp1);
4412 
4413     if (imm > 0 && magic < 0) {
4414       __ Add(temp1, temp1, dividend);
4415     } else if (imm < 0 && magic > 0) {
4416       __ Sub(temp1, temp1, dividend);
4417     }
4418 
4419     if (shift != 0) {
4420       __ Asr(temp1, temp1, shift);
4421     }
4422 
4423     if (instruction->IsDiv()) {
4424       __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4425     } else {
4426       __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4427       // TODO: Strength reduction for mls.
4428       __ Mov(temp2, imm);
4429       __ Mls(out, temp1, temp2, dividend);
4430     }
4431   }
4432 }
4433 
GenerateDivRemConstantIntegral(HBinaryOperation * instruction)4434 void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral(
4435     HBinaryOperation* instruction) {
4436   DCHECK(instruction->IsDiv() || instruction->IsRem());
4437   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4438 
4439   Location second = instruction->GetLocations()->InAt(1);
4440   DCHECK(second.IsConstant());
4441 
4442   int32_t imm = Int32ConstantFrom(second);
4443   if (imm == 0) {
4444     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4445   } else if (imm == 1 || imm == -1) {
4446     DivRemOneOrMinusOne(instruction);
4447   } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4448     DivRemByPowerOfTwo(instruction);
4449   } else {
4450     DCHECK(imm <= -2 || imm >= 2);
4451     GenerateDivRemWithAnyConstant(instruction);
4452   }
4453 }
4454 
VisitDiv(HDiv * div)4455 void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
4456   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4457   if (div->GetResultType() == DataType::Type::kInt64) {
4458     // pLdiv runtime call.
4459     call_kind = LocationSummary::kCallOnMainOnly;
4460   } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) {
4461     // sdiv will be replaced by other instruction sequence.
4462   } else if (div->GetResultType() == DataType::Type::kInt32 &&
4463              !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4464     // pIdivmod runtime call.
4465     call_kind = LocationSummary::kCallOnMainOnly;
4466   }
4467 
4468   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4469 
4470   switch (div->GetResultType()) {
4471     case DataType::Type::kInt32: {
4472       if (div->InputAt(1)->IsConstant()) {
4473         locations->SetInAt(0, Location::RequiresRegister());
4474         locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
4475         int32_t value = Int32ConstantFrom(div->InputAt(1));
4476         Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4477         if (value == 1 || value == 0 || value == -1) {
4478           // No temp register required.
4479         } else if (IsPowerOfTwo(AbsOrMin(value)) &&
4480                    value != 2 &&
4481                    value != -2 &&
4482                    !HasNonNegativeOrMinIntInputAt(div, 0)) {
4483           // The "out" register is used as a temporary, so it overlaps with the inputs.
4484           out_overlaps = Location::kOutputOverlap;
4485         } else {
4486           locations->AddRegisterTemps(2);
4487         }
4488         locations->SetOut(Location::RequiresRegister(), out_overlaps);
4489       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4490         locations->SetInAt(0, Location::RequiresRegister());
4491         locations->SetInAt(1, Location::RequiresRegister());
4492         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4493       } else {
4494         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4495         locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4496         locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4497         // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4498         //       we only need the former.
4499         locations->SetOut(LocationFrom(r0));
4500       }
4501       break;
4502     }
4503     case DataType::Type::kInt64: {
4504       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4505       locations->SetInAt(0, LocationFrom(
4506           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4507       locations->SetInAt(1, LocationFrom(
4508           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4509       locations->SetOut(LocationFrom(r0, r1));
4510       break;
4511     }
4512     case DataType::Type::kFloat32:
4513     case DataType::Type::kFloat64: {
4514       locations->SetInAt(0, Location::RequiresFpuRegister());
4515       locations->SetInAt(1, Location::RequiresFpuRegister());
4516       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4517       break;
4518     }
4519 
4520     default:
4521       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4522   }
4523 }
4524 
VisitDiv(HDiv * div)4525 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
4526   Location lhs = div->GetLocations()->InAt(0);
4527   Location rhs = div->GetLocations()->InAt(1);
4528 
4529   switch (div->GetResultType()) {
4530     case DataType::Type::kInt32: {
4531       if (rhs.IsConstant()) {
4532         GenerateDivRemConstantIntegral(div);
4533       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4534         __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
4535       } else {
4536         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4537         DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
4538         DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
4539         DCHECK(r0.Is(OutputRegister(div)));
4540 
4541         codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
4542         CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4543       }
4544       break;
4545     }
4546 
4547     case DataType::Type::kInt64: {
4548       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4549       DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
4550       DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
4551       DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
4552       DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
4553       DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
4554       DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
4555 
4556       codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
4557       CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4558       break;
4559     }
4560 
4561     case DataType::Type::kFloat32:
4562     case DataType::Type::kFloat64:
4563       __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
4564       break;
4565 
4566     default:
4567       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4568   }
4569 }
4570 
VisitRem(HRem * rem)4571 void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
4572   DataType::Type type = rem->GetResultType();
4573 
4574   // Most remainders are implemented in the runtime.
4575   LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
4576   if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) {
4577     // sdiv will be replaced by other instruction sequence.
4578     call_kind = LocationSummary::kNoCall;
4579   } else if ((rem->GetResultType() == DataType::Type::kInt32)
4580              && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4581     // Have hardware divide instruction for int, do it with three instructions.
4582     call_kind = LocationSummary::kNoCall;
4583   }
4584 
4585   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4586 
4587   switch (type) {
4588     case DataType::Type::kInt32: {
4589       if (rem->InputAt(1)->IsConstant()) {
4590         locations->SetInAt(0, Location::RequiresRegister());
4591         locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
4592         int32_t value = Int32ConstantFrom(rem->InputAt(1));
4593         Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4594         if (value == 1 || value == 0 || value == -1) {
4595           // No temp register required.
4596         } else if (IsPowerOfTwo(AbsOrMin(value)) && !HasNonNegativeOrMinIntInputAt(rem, 0)) {
4597           // The "out" register is used as a temporary, so it overlaps with the inputs.
4598           out_overlaps = Location::kOutputOverlap;
4599         } else {
4600           locations->AddRegisterTemps(2);
4601         }
4602         locations->SetOut(Location::RequiresRegister(), out_overlaps);
4603       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4604         locations->SetInAt(0, Location::RequiresRegister());
4605         locations->SetInAt(1, Location::RequiresRegister());
4606         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4607         locations->AddTemp(Location::RequiresRegister());
4608       } else {
4609         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4610         locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4611         locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4612         // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4613         //       we only need the latter.
4614         locations->SetOut(LocationFrom(r1));
4615       }
4616       break;
4617     }
4618     case DataType::Type::kInt64: {
4619       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4620       locations->SetInAt(0, LocationFrom(
4621           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4622       locations->SetInAt(1, LocationFrom(
4623           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4624       // The runtime helper puts the output in R2,R3.
4625       locations->SetOut(LocationFrom(r2, r3));
4626       break;
4627     }
4628     case DataType::Type::kFloat32: {
4629       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4630       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4631       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
4632       locations->SetOut(LocationFrom(s0));
4633       break;
4634     }
4635 
4636     case DataType::Type::kFloat64: {
4637       InvokeRuntimeCallingConventionARMVIXL calling_convention;
4638       locations->SetInAt(0, LocationFrom(
4639           calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
4640       locations->SetInAt(1, LocationFrom(
4641           calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
4642       locations->SetOut(LocationFrom(s0, s1));
4643       break;
4644     }
4645 
4646     default:
4647       LOG(FATAL) << "Unexpected rem type " << type;
4648   }
4649 }
4650 
VisitRem(HRem * rem)4651 void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
4652   LocationSummary* locations = rem->GetLocations();
4653   Location second = locations->InAt(1);
4654 
4655   DataType::Type type = rem->GetResultType();
4656   switch (type) {
4657     case DataType::Type::kInt32: {
4658         vixl32::Register reg1 = InputRegisterAt(rem, 0);
4659         vixl32::Register out_reg = OutputRegister(rem);
4660         if (second.IsConstant()) {
4661           GenerateDivRemConstantIntegral(rem);
4662         } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4663         vixl32::Register reg2 = RegisterFrom(second);
4664         vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
4665 
4666         // temp = reg1 / reg2  (integer division)
4667         // dest = reg1 - temp * reg2
4668         __ Sdiv(temp, reg1, reg2);
4669         __ Mls(out_reg, temp, reg2, reg1);
4670       } else {
4671         InvokeRuntimeCallingConventionARMVIXL calling_convention;
4672         DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
4673         DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
4674         DCHECK(out_reg.Is(r1));
4675 
4676         codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
4677         CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4678       }
4679       break;
4680     }
4681 
4682     case DataType::Type::kInt64: {
4683       codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
4684         CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4685       break;
4686     }
4687 
4688     case DataType::Type::kFloat32: {
4689       codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
4690       CheckEntrypointTypes<kQuickFmodf, float, float, float>();
4691       break;
4692     }
4693 
4694     case DataType::Type::kFloat64: {
4695       codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
4696       CheckEntrypointTypes<kQuickFmod, double, double, double>();
4697       break;
4698     }
4699 
4700     default:
4701       LOG(FATAL) << "Unexpected rem type " << type;
4702   }
4703 }
4704 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4705 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4706   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4707   switch (minmax->GetResultType()) {
4708     case DataType::Type::kInt32:
4709       locations->SetInAt(0, Location::RequiresRegister());
4710       locations->SetInAt(1, Location::RequiresRegister());
4711       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4712       break;
4713     case DataType::Type::kInt64:
4714       locations->SetInAt(0, Location::RequiresRegister());
4715       locations->SetInAt(1, Location::RequiresRegister());
4716       locations->SetOut(Location::SameAsFirstInput());
4717       break;
4718     case DataType::Type::kFloat32:
4719       locations->SetInAt(0, Location::RequiresFpuRegister());
4720       locations->SetInAt(1, Location::RequiresFpuRegister());
4721       locations->SetOut(Location::SameAsFirstInput());
4722       locations->AddTemp(Location::RequiresRegister());
4723       break;
4724     case DataType::Type::kFloat64:
4725       locations->SetInAt(0, Location::RequiresFpuRegister());
4726       locations->SetInAt(1, Location::RequiresFpuRegister());
4727       locations->SetOut(Location::SameAsFirstInput());
4728       break;
4729     default:
4730       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4731   }
4732 }
4733 
GenerateMinMaxInt(LocationSummary * locations,bool is_min)4734 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
4735   Location op1_loc = locations->InAt(0);
4736   Location op2_loc = locations->InAt(1);
4737   Location out_loc = locations->Out();
4738 
4739   vixl32::Register op1 = RegisterFrom(op1_loc);
4740   vixl32::Register op2 = RegisterFrom(op2_loc);
4741   vixl32::Register out = RegisterFrom(out_loc);
4742 
4743   __ Cmp(op1, op2);
4744 
4745   {
4746     ExactAssemblyScope aas(GetVIXLAssembler(),
4747                            3 * kMaxInstructionSizeInBytes,
4748                            CodeBufferCheckScope::kMaximumSize);
4749 
4750     __ ite(is_min ? lt : gt);
4751     __ mov(is_min ? lt : gt, out, op1);
4752     __ mov(is_min ? ge : le, out, op2);
4753   }
4754 }
4755 
GenerateMinMaxLong(LocationSummary * locations,bool is_min)4756 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
4757   Location op1_loc = locations->InAt(0);
4758   Location op2_loc = locations->InAt(1);
4759   Location out_loc = locations->Out();
4760 
4761   // Optimization: don't generate any code if inputs are the same.
4762   if (op1_loc.Equals(op2_loc)) {
4763     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
4764     return;
4765   }
4766 
4767   vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
4768   vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
4769   vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
4770   vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
4771   vixl32::Register out_lo = LowRegisterFrom(out_loc);
4772   vixl32::Register out_hi = HighRegisterFrom(out_loc);
4773   UseScratchRegisterScope temps(GetVIXLAssembler());
4774   const vixl32::Register temp = temps.Acquire();
4775 
4776   DCHECK(op1_lo.Is(out_lo));
4777   DCHECK(op1_hi.Is(out_hi));
4778 
4779   // Compare op1 >= op2, or op1 < op2.
4780   __ Cmp(out_lo, op2_lo);
4781   __ Sbcs(temp, out_hi, op2_hi);
4782 
4783   // Now GE/LT condition code is correct for the long comparison.
4784   {
4785     vixl32::ConditionType cond = is_min ? ge : lt;
4786     ExactAssemblyScope it_scope(GetVIXLAssembler(),
4787                                 3 * kMaxInstructionSizeInBytes,
4788                                 CodeBufferCheckScope::kMaximumSize);
4789     __ itt(cond);
4790     __ mov(cond, out_lo, op2_lo);
4791     __ mov(cond, out_hi, op2_hi);
4792   }
4793 }
4794 
GenerateMinMaxFloat(HInstruction * minmax,bool is_min)4795 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
4796   LocationSummary* locations = minmax->GetLocations();
4797   Location op1_loc = locations->InAt(0);
4798   Location op2_loc = locations->InAt(1);
4799   Location out_loc = locations->Out();
4800 
4801   // Optimization: don't generate any code if inputs are the same.
4802   if (op1_loc.Equals(op2_loc)) {
4803     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
4804     return;
4805   }
4806 
4807   vixl32::SRegister op1 = SRegisterFrom(op1_loc);
4808   vixl32::SRegister op2 = SRegisterFrom(op2_loc);
4809   vixl32::SRegister out = SRegisterFrom(out_loc);
4810 
4811   UseScratchRegisterScope temps(GetVIXLAssembler());
4812   const vixl32::Register temp1 = temps.Acquire();
4813   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
4814   vixl32::Label nan, done;
4815   vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
4816 
4817   DCHECK(op1.Is(out));
4818 
4819   __ Vcmp(op1, op2);
4820   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
4821   __ B(vs, &nan, /* is_far_target= */ false);  // if un-ordered, go to NaN handling.
4822 
4823   // op1 <> op2
4824   vixl32::ConditionType cond = is_min ? gt : lt;
4825   {
4826     ExactAssemblyScope it_scope(GetVIXLAssembler(),
4827                                 2 * kMaxInstructionSizeInBytes,
4828                                 CodeBufferCheckScope::kMaximumSize);
4829     __ it(cond);
4830     __ vmov(cond, F32, out, op2);
4831   }
4832   // for <>(not equal), we've done min/max calculation.
4833   __ B(ne, final_label, /* is_far_target= */ false);
4834 
4835   // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
4836   __ Vmov(temp1, op1);
4837   __ Vmov(temp2, op2);
4838   if (is_min) {
4839     __ Orr(temp1, temp1, temp2);
4840   } else {
4841     __ And(temp1, temp1, temp2);
4842   }
4843   __ Vmov(out, temp1);
4844   __ B(final_label);
4845 
4846   // handle NaN input.
4847   __ Bind(&nan);
4848   __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
4849   __ Vmov(out, temp1);
4850 
4851   if (done.IsReferenced()) {
4852     __ Bind(&done);
4853   }
4854 }
4855 
GenerateMinMaxDouble(HInstruction * minmax,bool is_min)4856 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
4857   LocationSummary* locations = minmax->GetLocations();
4858   Location op1_loc = locations->InAt(0);
4859   Location op2_loc = locations->InAt(1);
4860   Location out_loc = locations->Out();
4861 
4862   // Optimization: don't generate any code if inputs are the same.
4863   if (op1_loc.Equals(op2_loc)) {
4864     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in.
4865     return;
4866   }
4867 
4868   vixl32::DRegister op1 = DRegisterFrom(op1_loc);
4869   vixl32::DRegister op2 = DRegisterFrom(op2_loc);
4870   vixl32::DRegister out = DRegisterFrom(out_loc);
4871   vixl32::Label handle_nan_eq, done;
4872   vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
4873 
4874   DCHECK(op1.Is(out));
4875 
4876   __ Vcmp(op1, op2);
4877   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
4878   __ B(vs, &handle_nan_eq, /* is_far_target= */ false);  // if un-ordered, go to NaN handling.
4879 
4880   // op1 <> op2
4881   vixl32::ConditionType cond = is_min ? gt : lt;
4882   {
4883     ExactAssemblyScope it_scope(GetVIXLAssembler(),
4884                                 2 * kMaxInstructionSizeInBytes,
4885                                 CodeBufferCheckScope::kMaximumSize);
4886     __ it(cond);
4887     __ vmov(cond, F64, out, op2);
4888   }
4889   // for <>(not equal), we've done min/max calculation.
4890   __ B(ne, final_label, /* is_far_target= */ false);
4891 
4892   // handle op1 == op2, max(+0.0,-0.0).
4893   if (!is_min) {
4894     __ Vand(F64, out, op1, op2);
4895     __ B(final_label);
4896   }
4897 
4898   // handle op1 == op2, min(+0.0,-0.0), NaN input.
4899   __ Bind(&handle_nan_eq);
4900   __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
4901 
4902   if (done.IsReferenced()) {
4903     __ Bind(&done);
4904   }
4905 }
4906 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4907 void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4908   DataType::Type type = minmax->GetResultType();
4909   switch (type) {
4910     case DataType::Type::kInt32:
4911       GenerateMinMaxInt(minmax->GetLocations(), is_min);
4912       break;
4913     case DataType::Type::kInt64:
4914       GenerateMinMaxLong(minmax->GetLocations(), is_min);
4915       break;
4916     case DataType::Type::kFloat32:
4917       GenerateMinMaxFloat(minmax, is_min);
4918       break;
4919     case DataType::Type::kFloat64:
4920       GenerateMinMaxDouble(minmax, is_min);
4921       break;
4922     default:
4923       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4924   }
4925 }
4926 
VisitMin(HMin * min)4927 void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
4928   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4929 }
4930 
VisitMin(HMin * min)4931 void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
4932   GenerateMinMax(min, /*is_min*/ true);
4933 }
4934 
VisitMax(HMax * max)4935 void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
4936   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4937 }
4938 
VisitMax(HMax * max)4939 void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
4940   GenerateMinMax(max, /*is_min*/ false);
4941 }
4942 
VisitAbs(HAbs * abs)4943 void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
4944   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4945   switch (abs->GetResultType()) {
4946     case DataType::Type::kInt32:
4947     case DataType::Type::kInt64:
4948       locations->SetInAt(0, Location::RequiresRegister());
4949       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4950       locations->AddTemp(Location::RequiresRegister());
4951       break;
4952     case DataType::Type::kFloat32:
4953     case DataType::Type::kFloat64:
4954       locations->SetInAt(0, Location::RequiresFpuRegister());
4955       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4956       break;
4957     default:
4958       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
4959   }
4960 }
4961 
VisitAbs(HAbs * abs)4962 void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
4963   LocationSummary* locations = abs->GetLocations();
4964   switch (abs->GetResultType()) {
4965     case DataType::Type::kInt32: {
4966       vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
4967       vixl32::Register out_reg = RegisterFrom(locations->Out());
4968       vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
4969       __ Asr(mask, in_reg, 31);
4970       __ Add(out_reg, in_reg, mask);
4971       __ Eor(out_reg, out_reg, mask);
4972       break;
4973     }
4974     case DataType::Type::kInt64: {
4975       Location in = locations->InAt(0);
4976       vixl32::Register in_reg_lo = LowRegisterFrom(in);
4977       vixl32::Register in_reg_hi = HighRegisterFrom(in);
4978       Location output = locations->Out();
4979       vixl32::Register out_reg_lo = LowRegisterFrom(output);
4980       vixl32::Register out_reg_hi = HighRegisterFrom(output);
4981       DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
4982       vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
4983       __ Asr(mask, in_reg_hi, 31);
4984       __ Adds(out_reg_lo, in_reg_lo, mask);
4985       __ Adc(out_reg_hi, in_reg_hi, mask);
4986       __ Eor(out_reg_lo, out_reg_lo, mask);
4987       __ Eor(out_reg_hi, out_reg_hi, mask);
4988       break;
4989     }
4990     case DataType::Type::kFloat32:
4991     case DataType::Type::kFloat64:
4992       __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
4993       break;
4994     default:
4995       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
4996   }
4997 }
4998 
VisitDivZeroCheck(HDivZeroCheck * instruction)4999 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5000   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5001   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5002 }
5003 
VisitDivZeroCheck(HDivZeroCheck * instruction)5004 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
5005   DivZeroCheckSlowPathARMVIXL* slow_path =
5006       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction);
5007   codegen_->AddSlowPath(slow_path);
5008 
5009   LocationSummary* locations = instruction->GetLocations();
5010   Location value = locations->InAt(0);
5011 
5012   switch (instruction->GetType()) {
5013     case DataType::Type::kBool:
5014     case DataType::Type::kUint8:
5015     case DataType::Type::kInt8:
5016     case DataType::Type::kUint16:
5017     case DataType::Type::kInt16:
5018     case DataType::Type::kInt32: {
5019       if (value.IsRegister()) {
5020         __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
5021       } else {
5022         DCHECK(value.IsConstant()) << value;
5023         if (Int32ConstantFrom(value) == 0) {
5024           __ B(slow_path->GetEntryLabel());
5025         }
5026       }
5027       break;
5028     }
5029     case DataType::Type::kInt64: {
5030       if (value.IsRegisterPair()) {
5031         UseScratchRegisterScope temps(GetVIXLAssembler());
5032         vixl32::Register temp = temps.Acquire();
5033         __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
5034         __ B(eq, slow_path->GetEntryLabel());
5035       } else {
5036         DCHECK(value.IsConstant()) << value;
5037         if (Int64ConstantFrom(value) == 0) {
5038           __ B(slow_path->GetEntryLabel());
5039         }
5040       }
5041       break;
5042     }
5043     default:
5044       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
5045   }
5046 }
5047 
HandleIntegerRotate(HRor * ror)5048 void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HRor* ror) {
5049   LocationSummary* locations = ror->GetLocations();
5050   vixl32::Register in = InputRegisterAt(ror, 0);
5051   Location rhs = locations->InAt(1);
5052   vixl32::Register out = OutputRegister(ror);
5053 
5054   if (rhs.IsConstant()) {
5055     // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
5056     // so map all rotations to a +ve. equivalent in that range.
5057     // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
5058     uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
5059     if (rot) {
5060       // Rotate, mapping left rotations to right equivalents if necessary.
5061       // (e.g. left by 2 bits == right by 30.)
5062       __ Ror(out, in, rot);
5063     } else if (!out.Is(in)) {
5064       __ Mov(out, in);
5065     }
5066   } else {
5067     __ Ror(out, in, RegisterFrom(rhs));
5068   }
5069 }
5070 
5071 // Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
5072 // rotates by swapping input regs (effectively rotating by the first 32-bits of
5073 // a larger rotation) or flipping direction (thus treating larger right/left
5074 // rotations as sub-word sized rotations in the other direction) as appropriate.
HandleLongRotate(HRor * ror)5075 void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
5076   LocationSummary* locations = ror->GetLocations();
5077   vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
5078   vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
5079   Location rhs = locations->InAt(1);
5080   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
5081   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
5082 
5083   if (rhs.IsConstant()) {
5084     uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
5085     // Map all rotations to +ve. equivalents on the interval [0,63].
5086     rot &= kMaxLongShiftDistance;
5087     // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
5088     // logic below to a simple pair of binary orr.
5089     // (e.g. 34 bits == in_reg swap + 2 bits right.)
5090     if (rot >= kArmBitsPerWord) {
5091       rot -= kArmBitsPerWord;
5092       std::swap(in_reg_hi, in_reg_lo);
5093     }
5094     // Rotate, or mov to out for zero or word size rotations.
5095     if (rot != 0u) {
5096       __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot));
5097       __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
5098       __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot));
5099       __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
5100     } else {
5101       __ Mov(out_reg_lo, in_reg_lo);
5102       __ Mov(out_reg_hi, in_reg_hi);
5103     }
5104   } else {
5105     vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
5106     vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
5107     vixl32::Label end;
5108     vixl32::Label shift_by_32_plus_shift_right;
5109     vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end);
5110 
5111     __ And(shift_right, RegisterFrom(rhs), 0x1F);
5112     __ Lsrs(shift_left, RegisterFrom(rhs), 6);
5113     __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
5114     __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false);
5115 
5116     // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
5117     // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
5118     __ Lsl(out_reg_hi, in_reg_hi, shift_left);
5119     __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5120     __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5121     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5122     __ Lsr(shift_left, in_reg_hi, shift_right);
5123     __ Add(out_reg_lo, out_reg_lo, shift_left);
5124     __ B(final_label);
5125 
5126     __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
5127     // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
5128     // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
5129     __ Lsr(out_reg_hi, in_reg_hi, shift_right);
5130     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5131     __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5132     __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5133     __ Lsl(shift_right, in_reg_hi, shift_left);
5134     __ Add(out_reg_lo, out_reg_lo, shift_right);
5135 
5136     if (end.IsReferenced()) {
5137       __ Bind(&end);
5138     }
5139   }
5140 }
5141 
VisitRor(HRor * ror)5142 void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
5143   LocationSummary* locations =
5144       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
5145   switch (ror->GetResultType()) {
5146     case DataType::Type::kInt32: {
5147       locations->SetInAt(0, Location::RequiresRegister());
5148       locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
5149       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5150       break;
5151     }
5152     case DataType::Type::kInt64: {
5153       locations->SetInAt(0, Location::RequiresRegister());
5154       if (ror->InputAt(1)->IsConstant()) {
5155         locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
5156       } else {
5157         locations->SetInAt(1, Location::RequiresRegister());
5158         locations->AddTemp(Location::RequiresRegister());
5159         locations->AddTemp(Location::RequiresRegister());
5160       }
5161       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5162       break;
5163     }
5164     default:
5165       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
5166   }
5167 }
5168 
VisitRor(HRor * ror)5169 void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
5170   DataType::Type type = ror->GetResultType();
5171   switch (type) {
5172     case DataType::Type::kInt32: {
5173       HandleIntegerRotate(ror);
5174       break;
5175     }
5176     case DataType::Type::kInt64: {
5177       HandleLongRotate(ror);
5178       break;
5179     }
5180     default:
5181       LOG(FATAL) << "Unexpected operation type " << type;
5182       UNREACHABLE();
5183   }
5184 }
5185 
HandleShift(HBinaryOperation * op)5186 void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
5187   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5188 
5189   LocationSummary* locations =
5190       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
5191 
5192   switch (op->GetResultType()) {
5193     case DataType::Type::kInt32: {
5194       locations->SetInAt(0, Location::RequiresRegister());
5195       if (op->InputAt(1)->IsConstant()) {
5196         locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
5197         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5198       } else {
5199         locations->SetInAt(1, Location::RequiresRegister());
5200         // Make the output overlap, as it will be used to hold the masked
5201         // second input.
5202         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5203       }
5204       break;
5205     }
5206     case DataType::Type::kInt64: {
5207       locations->SetInAt(0, Location::RequiresRegister());
5208       if (op->InputAt(1)->IsConstant()) {
5209         locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
5210         // For simplicity, use kOutputOverlap even though we only require that low registers
5211         // don't clash with high registers which the register allocator currently guarantees.
5212         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5213       } else {
5214         locations->SetInAt(1, Location::RequiresRegister());
5215         locations->AddTemp(Location::RequiresRegister());
5216         locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5217       }
5218       break;
5219     }
5220     default:
5221       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5222   }
5223 }
5224 
HandleShift(HBinaryOperation * op)5225 void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
5226   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5227 
5228   LocationSummary* locations = op->GetLocations();
5229   Location out = locations->Out();
5230   Location first = locations->InAt(0);
5231   Location second = locations->InAt(1);
5232 
5233   DataType::Type type = op->GetResultType();
5234   switch (type) {
5235     case DataType::Type::kInt32: {
5236       vixl32::Register out_reg = OutputRegister(op);
5237       vixl32::Register first_reg = InputRegisterAt(op, 0);
5238       if (second.IsRegister()) {
5239         vixl32::Register second_reg = RegisterFrom(second);
5240         // ARM doesn't mask the shift count so we need to do it ourselves.
5241         __ And(out_reg, second_reg, kMaxIntShiftDistance);
5242         if (op->IsShl()) {
5243           __ Lsl(out_reg, first_reg, out_reg);
5244         } else if (op->IsShr()) {
5245           __ Asr(out_reg, first_reg, out_reg);
5246         } else {
5247           __ Lsr(out_reg, first_reg, out_reg);
5248         }
5249       } else {
5250         int32_t cst = Int32ConstantFrom(second);
5251         uint32_t shift_value = cst & kMaxIntShiftDistance;
5252         if (shift_value == 0) {  // ARM does not support shifting with 0 immediate.
5253           __ Mov(out_reg, first_reg);
5254         } else if (op->IsShl()) {
5255           __ Lsl(out_reg, first_reg, shift_value);
5256         } else if (op->IsShr()) {
5257           __ Asr(out_reg, first_reg, shift_value);
5258         } else {
5259           __ Lsr(out_reg, first_reg, shift_value);
5260         }
5261       }
5262       break;
5263     }
5264     case DataType::Type::kInt64: {
5265       vixl32::Register o_h = HighRegisterFrom(out);
5266       vixl32::Register o_l = LowRegisterFrom(out);
5267 
5268       vixl32::Register high = HighRegisterFrom(first);
5269       vixl32::Register low = LowRegisterFrom(first);
5270 
5271       if (second.IsRegister()) {
5272         vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5273 
5274         vixl32::Register second_reg = RegisterFrom(second);
5275 
5276         if (op->IsShl()) {
5277           __ And(o_l, second_reg, kMaxLongShiftDistance);
5278           // Shift the high part
5279           __ Lsl(o_h, high, o_l);
5280           // Shift the low part and `or` what overflew on the high part
5281           __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord));
5282           __ Lsr(temp, low, temp);
5283           __ Orr(o_h, o_h, temp);
5284           // If the shift is > 32 bits, override the high part
5285           __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
5286           {
5287             ExactAssemblyScope guard(GetVIXLAssembler(),
5288                                      2 * vixl32::kMaxInstructionSizeInBytes,
5289                                      CodeBufferCheckScope::kMaximumSize);
5290             __ it(pl);
5291             __ lsl(pl, o_h, low, temp);
5292           }
5293           // Shift the low part
5294           __ Lsl(o_l, low, o_l);
5295         } else if (op->IsShr()) {
5296           __ And(o_h, second_reg, kMaxLongShiftDistance);
5297           // Shift the low part
5298           __ Lsr(o_l, low, o_h);
5299           // Shift the high part and `or` what underflew on the low part
5300           __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5301           __ Lsl(temp, high, temp);
5302           __ Orr(o_l, o_l, temp);
5303           // If the shift is > 32 bits, override the low part
5304           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5305           {
5306             ExactAssemblyScope guard(GetVIXLAssembler(),
5307                                      2 * vixl32::kMaxInstructionSizeInBytes,
5308                                      CodeBufferCheckScope::kMaximumSize);
5309             __ it(pl);
5310             __ asr(pl, o_l, high, temp);
5311           }
5312           // Shift the high part
5313           __ Asr(o_h, high, o_h);
5314         } else {
5315           __ And(o_h, second_reg, kMaxLongShiftDistance);
5316           // same as Shr except we use `Lsr`s and not `Asr`s
5317           __ Lsr(o_l, low, o_h);
5318           __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5319           __ Lsl(temp, high, temp);
5320           __ Orr(o_l, o_l, temp);
5321           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5322           {
5323             ExactAssemblyScope guard(GetVIXLAssembler(),
5324                                      2 * vixl32::kMaxInstructionSizeInBytes,
5325                                      CodeBufferCheckScope::kMaximumSize);
5326           __ it(pl);
5327           __ lsr(pl, o_l, high, temp);
5328           }
5329           __ Lsr(o_h, high, o_h);
5330         }
5331       } else {
5332         // Register allocator doesn't create partial overlap.
5333         DCHECK(!o_l.Is(high));
5334         DCHECK(!o_h.Is(low));
5335         int32_t cst = Int32ConstantFrom(second);
5336         uint32_t shift_value = cst & kMaxLongShiftDistance;
5337         if (shift_value > 32) {
5338           if (op->IsShl()) {
5339             __ Lsl(o_h, low, shift_value - 32);
5340             __ Mov(o_l, 0);
5341           } else if (op->IsShr()) {
5342             __ Asr(o_l, high, shift_value - 32);
5343             __ Asr(o_h, high, 31);
5344           } else {
5345             __ Lsr(o_l, high, shift_value - 32);
5346             __ Mov(o_h, 0);
5347           }
5348         } else if (shift_value == 32) {
5349           if (op->IsShl()) {
5350             __ Mov(o_h, low);
5351             __ Mov(o_l, 0);
5352           } else if (op->IsShr()) {
5353             __ Mov(o_l, high);
5354             __ Asr(o_h, high, 31);
5355           } else {
5356             __ Mov(o_l, high);
5357             __ Mov(o_h, 0);
5358           }
5359         } else if (shift_value == 1) {
5360           if (op->IsShl()) {
5361             __ Lsls(o_l, low, 1);
5362             __ Adc(o_h, high, high);
5363           } else if (op->IsShr()) {
5364             __ Asrs(o_h, high, 1);
5365             __ Rrx(o_l, low);
5366           } else {
5367             __ Lsrs(o_h, high, 1);
5368             __ Rrx(o_l, low);
5369           }
5370         } else if (shift_value == 0) {
5371           __ Mov(o_l, low);
5372           __ Mov(o_h, high);
5373         } else {
5374           DCHECK(0 < shift_value && shift_value < 32) << shift_value;
5375           if (op->IsShl()) {
5376             __ Lsl(o_h, high, shift_value);
5377             __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
5378             __ Lsl(o_l, low, shift_value);
5379           } else if (op->IsShr()) {
5380             __ Lsr(o_l, low, shift_value);
5381             __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5382             __ Asr(o_h, high, shift_value);
5383           } else {
5384             __ Lsr(o_l, low, shift_value);
5385             __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5386             __ Lsr(o_h, high, shift_value);
5387           }
5388         }
5389       }
5390       break;
5391     }
5392     default:
5393       LOG(FATAL) << "Unexpected operation type " << type;
5394       UNREACHABLE();
5395   }
5396 }
5397 
VisitShl(HShl * shl)5398 void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
5399   HandleShift(shl);
5400 }
5401 
VisitShl(HShl * shl)5402 void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
5403   HandleShift(shl);
5404 }
5405 
VisitShr(HShr * shr)5406 void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
5407   HandleShift(shr);
5408 }
5409 
VisitShr(HShr * shr)5410 void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
5411   HandleShift(shr);
5412 }
5413 
VisitUShr(HUShr * ushr)5414 void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
5415   HandleShift(ushr);
5416 }
5417 
VisitUShr(HUShr * ushr)5418 void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
5419   HandleShift(ushr);
5420 }
5421 
VisitNewInstance(HNewInstance * instruction)5422 void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5423   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5424       instruction, LocationSummary::kCallOnMainOnly);
5425   InvokeRuntimeCallingConventionARMVIXL calling_convention;
5426   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5427   locations->SetOut(LocationFrom(r0));
5428 }
5429 
VisitNewInstance(HNewInstance * instruction)5430 void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5431   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5432   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5433   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12);
5434 }
5435 
VisitNewArray(HNewArray * instruction)5436 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
5437   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5438       instruction, LocationSummary::kCallOnMainOnly);
5439   InvokeRuntimeCallingConventionARMVIXL calling_convention;
5440   locations->SetOut(LocationFrom(r0));
5441   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5442   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5443 }
5444 
VisitNewArray(HNewArray * instruction)5445 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
5446   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5447   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5448   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5449   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5450   DCHECK(!codegen_->IsLeafMethod());
5451   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13);
5452 }
5453 
VisitParameterValue(HParameterValue * instruction)5454 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
5455   LocationSummary* locations =
5456       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5457   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5458   if (location.IsStackSlot()) {
5459     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5460   } else if (location.IsDoubleStackSlot()) {
5461     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5462   }
5463   locations->SetOut(location);
5464 }
5465 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5466 void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
5467     HParameterValue* instruction ATTRIBUTE_UNUSED) {
5468   // Nothing to do, the parameter is already at its location.
5469 }
5470 
VisitCurrentMethod(HCurrentMethod * instruction)5471 void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
5472   LocationSummary* locations =
5473       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5474   locations->SetOut(LocationFrom(kMethodRegister));
5475 }
5476 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5477 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
5478     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5479   // Nothing to do, the method is already at its location.
5480 }
5481 
VisitNot(HNot * not_)5482 void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
5483   LocationSummary* locations =
5484       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5485   locations->SetInAt(0, Location::RequiresRegister());
5486   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5487 }
5488 
VisitNot(HNot * not_)5489 void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
5490   LocationSummary* locations = not_->GetLocations();
5491   Location out = locations->Out();
5492   Location in = locations->InAt(0);
5493   switch (not_->GetResultType()) {
5494     case DataType::Type::kInt32:
5495       __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
5496       break;
5497 
5498     case DataType::Type::kInt64:
5499       __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
5500       __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
5501       break;
5502 
5503     default:
5504       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5505   }
5506 }
5507 
VisitBooleanNot(HBooleanNot * bool_not)5508 void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5509   LocationSummary* locations =
5510       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5511   locations->SetInAt(0, Location::RequiresRegister());
5512   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5513 }
5514 
VisitBooleanNot(HBooleanNot * bool_not)5515 void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5516   __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
5517 }
5518 
VisitCompare(HCompare * compare)5519 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
5520   LocationSummary* locations =
5521       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5522   switch (compare->InputAt(0)->GetType()) {
5523     case DataType::Type::kBool:
5524     case DataType::Type::kUint8:
5525     case DataType::Type::kInt8:
5526     case DataType::Type::kUint16:
5527     case DataType::Type::kInt16:
5528     case DataType::Type::kInt32:
5529     case DataType::Type::kInt64: {
5530       locations->SetInAt(0, Location::RequiresRegister());
5531       locations->SetInAt(1, Location::RequiresRegister());
5532       // Output overlaps because it is written before doing the low comparison.
5533       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5534       break;
5535     }
5536     case DataType::Type::kFloat32:
5537     case DataType::Type::kFloat64: {
5538       locations->SetInAt(0, Location::RequiresFpuRegister());
5539       locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
5540       locations->SetOut(Location::RequiresRegister());
5541       break;
5542     }
5543     default:
5544       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5545   }
5546 }
5547 
VisitCompare(HCompare * compare)5548 void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
5549   LocationSummary* locations = compare->GetLocations();
5550   vixl32::Register out = OutputRegister(compare);
5551   Location left = locations->InAt(0);
5552   Location right = locations->InAt(1);
5553 
5554   vixl32::Label less, greater, done;
5555   vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
5556   DataType::Type type = compare->InputAt(0)->GetType();
5557   vixl32::Condition less_cond = vixl32::Condition::None();
5558   switch (type) {
5559     case DataType::Type::kBool:
5560     case DataType::Type::kUint8:
5561     case DataType::Type::kInt8:
5562     case DataType::Type::kUint16:
5563     case DataType::Type::kInt16:
5564     case DataType::Type::kInt32: {
5565       // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
5566       __ Mov(out, 0);
5567       __ Cmp(RegisterFrom(left), RegisterFrom(right));  // Signed compare.
5568       less_cond = lt;
5569       break;
5570     }
5571     case DataType::Type::kInt64: {
5572       __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));  // Signed compare.
5573       __ B(lt, &less, /* is_far_target= */ false);
5574       __ B(gt, &greater, /* is_far_target= */ false);
5575       // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
5576       __ Mov(out, 0);
5577       __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));  // Unsigned compare.
5578       less_cond = lo;
5579       break;
5580     }
5581     case DataType::Type::kFloat32:
5582     case DataType::Type::kFloat64: {
5583       __ Mov(out, 0);
5584       GenerateVcmp(compare, codegen_);
5585       // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
5586       __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5587       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
5588       break;
5589     }
5590     default:
5591       LOG(FATAL) << "Unexpected compare type " << type;
5592       UNREACHABLE();
5593   }
5594 
5595   __ B(eq, final_label, /* is_far_target= */ false);
5596   __ B(less_cond, &less, /* is_far_target= */ false);
5597 
5598   __ Bind(&greater);
5599   __ Mov(out, 1);
5600   __ B(final_label);
5601 
5602   __ Bind(&less);
5603   __ Mov(out, -1);
5604 
5605   if (done.IsReferenced()) {
5606     __ Bind(&done);
5607   }
5608 }
5609 
VisitPhi(HPhi * instruction)5610 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
5611   LocationSummary* locations =
5612       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5613   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5614     locations->SetInAt(i, Location::Any());
5615   }
5616   locations->SetOut(Location::Any());
5617 }
5618 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5619 void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5620   LOG(FATAL) << "Unreachable";
5621 }
5622 
GenerateMemoryBarrier(MemBarrierKind kind)5623 void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
5624   // TODO (ported from quick): revisit ARM barrier kinds.
5625   DmbOptions flavor = DmbOptions::ISH;  // Quiet C++ warnings.
5626   switch (kind) {
5627     case MemBarrierKind::kAnyStore:
5628     case MemBarrierKind::kLoadAny:
5629     case MemBarrierKind::kAnyAny: {
5630       flavor = DmbOptions::ISH;
5631       break;
5632     }
5633     case MemBarrierKind::kStoreStore: {
5634       flavor = DmbOptions::ISHST;
5635       break;
5636     }
5637     default:
5638       LOG(FATAL) << "Unexpected memory barrier " << kind;
5639   }
5640   __ Dmb(flavor);
5641 }
5642 
GenerateWideAtomicLoad(vixl32::Register addr,uint32_t offset,vixl32::Register out_lo,vixl32::Register out_hi)5643 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
5644                                                              uint32_t offset,
5645                                                              vixl32::Register out_lo,
5646                                                              vixl32::Register out_hi) {
5647   UseScratchRegisterScope temps(GetVIXLAssembler());
5648   if (offset != 0) {
5649     vixl32::Register temp = temps.Acquire();
5650     __ Add(temp, addr, offset);
5651     addr = temp;
5652   }
5653   __ Ldrexd(out_lo, out_hi, MemOperand(addr));
5654 }
5655 
GenerateWideAtomicStore(vixl32::Register addr,uint32_t offset,vixl32::Register value_lo,vixl32::Register value_hi,vixl32::Register temp1,vixl32::Register temp2,HInstruction * instruction)5656 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
5657                                                               uint32_t offset,
5658                                                               vixl32::Register value_lo,
5659                                                               vixl32::Register value_hi,
5660                                                               vixl32::Register temp1,
5661                                                               vixl32::Register temp2,
5662                                                               HInstruction* instruction) {
5663   UseScratchRegisterScope temps(GetVIXLAssembler());
5664   vixl32::Label fail;
5665   if (offset != 0) {
5666     vixl32::Register temp = temps.Acquire();
5667     __ Add(temp, addr, offset);
5668     addr = temp;
5669   }
5670   __ Bind(&fail);
5671   {
5672     // Ensure the pc position is recorded immediately after the `ldrexd` instruction.
5673     ExactAssemblyScope aas(GetVIXLAssembler(),
5674                            vixl32::kMaxInstructionSizeInBytes,
5675                            CodeBufferCheckScope::kMaximumSize);
5676     // We need a load followed by store. (The address used in a STREX instruction must
5677     // be the same as the address in the most recently executed LDREX instruction.)
5678     __ ldrexd(temp1, temp2, MemOperand(addr));
5679     codegen_->MaybeRecordImplicitNullCheck(instruction);
5680   }
5681   __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
5682   __ CompareAndBranchIfNonZero(temp1, &fail);
5683 }
5684 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5685 void LocationsBuilderARMVIXL::HandleFieldSet(
5686     HInstruction* instruction, const FieldInfo& field_info) {
5687   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5688 
5689   LocationSummary* locations =
5690       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5691   locations->SetInAt(0, Location::RequiresRegister());
5692 
5693   DataType::Type field_type = field_info.GetFieldType();
5694   if (DataType::IsFloatingPointType(field_type)) {
5695     locations->SetInAt(1, Location::RequiresFpuRegister());
5696   } else {
5697     locations->SetInAt(1, Location::RequiresRegister());
5698   }
5699 
5700   bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64;
5701   bool generate_volatile = field_info.IsVolatile()
5702       && is_wide
5703       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5704   bool needs_write_barrier =
5705       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5706   // Temporary registers for the write barrier.
5707   // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
5708   if (needs_write_barrier) {
5709     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
5710     locations->AddTemp(Location::RequiresRegister());
5711   } else if (generate_volatile) {
5712     // ARM encoding have some additional constraints for ldrexd/strexd:
5713     // - registers need to be consecutive
5714     // - the first register should be even but not R14.
5715     // We don't test for ARM yet, and the assertion makes sure that we
5716     // revisit this if we ever enable ARM encoding.
5717     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
5718 
5719     locations->AddTemp(Location::RequiresRegister());
5720     locations->AddTemp(Location::RequiresRegister());
5721     if (field_type == DataType::Type::kFloat64) {
5722       // For doubles we need two more registers to copy the value.
5723       locations->AddTemp(LocationFrom(r2));
5724       locations->AddTemp(LocationFrom(r3));
5725     }
5726   }
5727 }
5728 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5729 void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
5730                                                      const FieldInfo& field_info,
5731                                                      bool value_can_be_null) {
5732   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5733 
5734   LocationSummary* locations = instruction->GetLocations();
5735   vixl32::Register base = InputRegisterAt(instruction, 0);
5736   Location value = locations->InAt(1);
5737   std::optional<vixl::aarch32::Label> pred_is_null;
5738 
5739   bool is_predicated =
5740       instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
5741   bool is_volatile = field_info.IsVolatile();
5742   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5743   DataType::Type field_type = field_info.GetFieldType();
5744   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5745   bool needs_write_barrier =
5746       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5747 
5748   if (is_predicated) {
5749     pred_is_null.emplace();
5750     __ CompareAndBranchIfZero(base, &*pred_is_null, /* is_far_target= */ false);
5751   }
5752 
5753   if (is_volatile) {
5754     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5755   }
5756 
5757   switch (field_type) {
5758     case DataType::Type::kBool:
5759     case DataType::Type::kUint8:
5760     case DataType::Type::kInt8:
5761     case DataType::Type::kUint16:
5762     case DataType::Type::kInt16:
5763     case DataType::Type::kInt32: {
5764       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5765       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5766       StoreOperandType operand_type = GetStoreOperandType(field_type);
5767       GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset);
5768       codegen_->MaybeRecordImplicitNullCheck(instruction);
5769       break;
5770     }
5771 
5772     case DataType::Type::kReference: {
5773       vixl32::Register value_reg = RegisterFrom(value);
5774       if (kPoisonHeapReferences && needs_write_barrier) {
5775         // Note that in the case where `value` is a null reference,
5776         // we do not enter this block, as a null reference does not
5777         // need poisoning.
5778         DCHECK_EQ(field_type, DataType::Type::kReference);
5779         value_reg = RegisterFrom(locations->GetTemp(0));
5780         __ Mov(value_reg, RegisterFrom(value));
5781         GetAssembler()->PoisonHeapReference(value_reg);
5782       }
5783       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5784       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5785       GetAssembler()->StoreToOffset(kStoreWord, value_reg, base, offset);
5786       codegen_->MaybeRecordImplicitNullCheck(instruction);
5787       break;
5788     }
5789 
5790     case DataType::Type::kInt64: {
5791       if (is_volatile && !atomic_ldrd_strd) {
5792         GenerateWideAtomicStore(base,
5793                                 offset,
5794                                 LowRegisterFrom(value),
5795                                 HighRegisterFrom(value),
5796                                 RegisterFrom(locations->GetTemp(0)),
5797                                 RegisterFrom(locations->GetTemp(1)),
5798                                 instruction);
5799       } else {
5800         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5801         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5802         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
5803         codegen_->MaybeRecordImplicitNullCheck(instruction);
5804       }
5805       break;
5806     }
5807 
5808     case DataType::Type::kFloat32: {
5809       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5810       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5811       GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
5812       codegen_->MaybeRecordImplicitNullCheck(instruction);
5813       break;
5814     }
5815 
5816     case DataType::Type::kFloat64: {
5817       vixl32::DRegister value_reg = DRegisterFrom(value);
5818       if (is_volatile && !atomic_ldrd_strd) {
5819         vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
5820         vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
5821 
5822         __ Vmov(value_reg_lo, value_reg_hi, value_reg);
5823 
5824         GenerateWideAtomicStore(base,
5825                                 offset,
5826                                 value_reg_lo,
5827                                 value_reg_hi,
5828                                 RegisterFrom(locations->GetTemp(2)),
5829                                 RegisterFrom(locations->GetTemp(3)),
5830                                 instruction);
5831       } else {
5832         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5833         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5834         GetAssembler()->StoreDToOffset(value_reg, base, offset);
5835         codegen_->MaybeRecordImplicitNullCheck(instruction);
5836       }
5837       break;
5838     }
5839 
5840     case DataType::Type::kUint32:
5841     case DataType::Type::kUint64:
5842     case DataType::Type::kVoid:
5843       LOG(FATAL) << "Unreachable type " << field_type;
5844       UNREACHABLE();
5845   }
5846 
5847   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5848     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5849     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
5850     codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
5851   }
5852 
5853   if (is_volatile) {
5854     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5855   }
5856 
5857   if (is_predicated) {
5858     __ Bind(&*pred_is_null);
5859   }
5860 }
5861 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5862 void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
5863                                              const FieldInfo& field_info) {
5864   DCHECK(instruction->IsInstanceFieldGet() ||
5865          instruction->IsStaticFieldGet() ||
5866          instruction->IsPredicatedInstanceFieldGet());
5867 
5868   bool object_field_get_with_read_barrier =
5869       kEmitCompilerReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference);
5870   bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
5871   LocationSummary* locations =
5872       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5873                                                        object_field_get_with_read_barrier
5874                                                            ? LocationSummary::kCallOnSlowPath
5875                                                            : LocationSummary::kNoCall);
5876   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5877     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5878   }
5879   // Input for object receiver.
5880   locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
5881 
5882   bool volatile_for_double = field_info.IsVolatile()
5883       && (field_info.GetFieldType() == DataType::Type::kFloat64)
5884       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5885   // The output overlaps in case of volatile long: we don't want the
5886   // code generated by GenerateWideAtomicLoad to overwrite the
5887   // object's location.  Likewise, in the case of an object field get
5888   // with read barriers enabled, we do not want the load to overwrite
5889   // the object's location, as we need it to emit the read barrier.
5890   bool overlap =
5891       (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) ||
5892       object_field_get_with_read_barrier;
5893 
5894   if (DataType::IsFloatingPointType(instruction->GetType())) {
5895     if (is_predicated) {
5896       locations->SetInAt(0, Location::RequiresFpuRegister());
5897       locations->SetOut(Location::SameAsFirstInput());
5898     } else {
5899       locations->SetOut(Location::RequiresFpuRegister());
5900     }
5901   } else {
5902     if (is_predicated) {
5903       locations->SetInAt(0, Location::RequiresRegister());
5904       locations->SetOut(Location::SameAsFirstInput());
5905     } else {
5906       locations->SetOut(Location::RequiresRegister(),
5907                         (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
5908     }
5909   }
5910   if (volatile_for_double) {
5911     // ARM encoding have some additional constraints for ldrexd/strexd:
5912     // - registers need to be consecutive
5913     // - the first register should be even but not R14.
5914     // We don't test for ARM yet, and the assertion makes sure that we
5915     // revisit this if we ever enable ARM encoding.
5916     DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
5917     locations->AddTemp(Location::RequiresRegister());
5918     locations->AddTemp(Location::RequiresRegister());
5919   } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5920     // We need a temporary register for the read barrier load in
5921     // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
5922     // only if the offset is too big.
5923     if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
5924       locations->AddTemp(Location::RequiresRegister());
5925     }
5926   }
5927 }
5928 
ArithmeticZeroOrFpuRegister(HInstruction * input)5929 Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
5930   DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
5931   if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
5932       (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
5933     return Location::ConstantLocation(input->AsConstant());
5934   } else {
5935     return Location::RequiresFpuRegister();
5936   }
5937 }
5938 
ArmEncodableConstantOrRegister(HInstruction * constant,Opcode opcode)5939 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
5940                                                                  Opcode opcode) {
5941   DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
5942   if (constant->IsConstant() &&
5943       CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
5944     return Location::ConstantLocation(constant->AsConstant());
5945   }
5946   return Location::RequiresRegister();
5947 }
5948 
CanEncode32BitConstantAsImmediate(CodeGeneratorARMVIXL * codegen,uint32_t value,Opcode opcode,vixl32::FlagsUpdate flags_update=vixl32::FlagsUpdate::DontCare)5949 static bool CanEncode32BitConstantAsImmediate(
5950     CodeGeneratorARMVIXL* codegen,
5951     uint32_t value,
5952     Opcode opcode,
5953     vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) {
5954   ArmVIXLAssembler* assembler = codegen->GetAssembler();
5955   if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) {
5956     return true;
5957   }
5958   Opcode neg_opcode = kNoOperand;
5959   uint32_t neg_value = 0;
5960   switch (opcode) {
5961     case AND: neg_opcode = BIC; neg_value = ~value; break;
5962     case ORR: neg_opcode = ORN; neg_value = ~value; break;
5963     case ADD: neg_opcode = SUB; neg_value = -value; break;
5964     case ADC: neg_opcode = SBC; neg_value = ~value; break;
5965     case SUB: neg_opcode = ADD; neg_value = -value; break;
5966     case SBC: neg_opcode = ADC; neg_value = ~value; break;
5967     case MOV: neg_opcode = MVN; neg_value = ~value; break;
5968     default:
5969       return false;
5970   }
5971 
5972   if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) {
5973     return true;
5974   }
5975 
5976   return opcode == AND && IsPowerOfTwo(value + 1);
5977 }
5978 
CanEncodeConstantAsImmediate(HConstant * input_cst,Opcode opcode)5979 bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) {
5980   uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
5981   if (DataType::Is64BitType(input_cst->GetType())) {
5982     Opcode high_opcode = opcode;
5983     vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare;
5984     switch (opcode) {
5985       case SUB:
5986         // Flip the operation to an ADD.
5987         value = -value;
5988         opcode = ADD;
5989         FALLTHROUGH_INTENDED;
5990       case ADD:
5991         if (Low32Bits(value) == 0u) {
5992           return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode);
5993         }
5994         high_opcode = ADC;
5995         low_flags_update = vixl32::FlagsUpdate::SetFlags;
5996         break;
5997       default:
5998         break;
5999     }
6000     return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) &&
6001            CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update);
6002   } else {
6003     return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode);
6004   }
6005 }
6006 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6007 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
6008                                                      const FieldInfo& field_info) {
6009   DCHECK(instruction->IsInstanceFieldGet() ||
6010          instruction->IsStaticFieldGet() ||
6011          instruction->IsPredicatedInstanceFieldGet());
6012 
6013   LocationSummary* locations = instruction->GetLocations();
6014   uint32_t receiver_input = instruction->IsPredicatedInstanceFieldGet() ? 1 : 0;
6015   vixl32::Register base = InputRegisterAt(instruction, receiver_input);
6016   Location out = locations->Out();
6017   bool is_volatile = field_info.IsVolatile();
6018   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
6019   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6020   DataType::Type load_type = instruction->GetType();
6021   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6022 
6023   switch (load_type) {
6024     case DataType::Type::kBool:
6025     case DataType::Type::kUint8:
6026     case DataType::Type::kInt8:
6027     case DataType::Type::kUint16:
6028     case DataType::Type::kInt16:
6029     case DataType::Type::kInt32: {
6030       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6031       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6032       LoadOperandType operand_type = GetLoadOperandType(load_type);
6033       GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
6034       codegen_->MaybeRecordImplicitNullCheck(instruction);
6035       break;
6036     }
6037 
6038     case DataType::Type::kReference: {
6039       // /* HeapReference<Object> */ out = *(base + offset)
6040       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
6041         Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6042         // Note that a potential implicit null check is handled in this
6043         // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
6044         codegen_->GenerateFieldLoadWithBakerReadBarrier(
6045             instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true);
6046         if (is_volatile) {
6047           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6048         }
6049       } else {
6050         {
6051           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6052           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6053           GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
6054           codegen_->MaybeRecordImplicitNullCheck(instruction);
6055         }
6056         if (is_volatile) {
6057           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6058         }
6059         // If read barriers are enabled, emit read barriers other than
6060         // Baker's using a slow path (and also unpoison the loaded
6061         // reference, if heap poisoning is enabled).
6062         codegen_->MaybeGenerateReadBarrierSlow(
6063             instruction, out, out, locations->InAt(receiver_input), offset);
6064       }
6065       break;
6066     }
6067 
6068     case DataType::Type::kInt64: {
6069       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6070       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6071       if (is_volatile && !atomic_ldrd_strd) {
6072         GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
6073       } else {
6074         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
6075       }
6076       codegen_->MaybeRecordImplicitNullCheck(instruction);
6077       break;
6078     }
6079 
6080     case DataType::Type::kFloat32: {
6081       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6082       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6083       GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
6084       codegen_->MaybeRecordImplicitNullCheck(instruction);
6085       break;
6086     }
6087 
6088     case DataType::Type::kFloat64: {
6089       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6090       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6091       vixl32::DRegister out_dreg = DRegisterFrom(out);
6092       if (is_volatile && !atomic_ldrd_strd) {
6093         vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
6094         vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
6095         GenerateWideAtomicLoad(base, offset, lo, hi);
6096         codegen_->MaybeRecordImplicitNullCheck(instruction);
6097         __ Vmov(out_dreg, lo, hi);
6098       } else {
6099         GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
6100         codegen_->MaybeRecordImplicitNullCheck(instruction);
6101       }
6102       break;
6103     }
6104 
6105     case DataType::Type::kUint32:
6106     case DataType::Type::kUint64:
6107     case DataType::Type::kVoid:
6108       LOG(FATAL) << "Unreachable type " << load_type;
6109       UNREACHABLE();
6110   }
6111 
6112   if (is_volatile) {
6113     if (load_type == DataType::Type::kReference) {
6114       // Memory barriers, in the case of references, are also handled
6115       // in the previous switch statement.
6116     } else {
6117       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6118     }
6119   }
6120 }
6121 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6122 void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6123   HandleFieldSet(instruction, instruction->GetFieldInfo());
6124 }
6125 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6126 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6127   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6128 }
6129 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6130 void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6131   HandleFieldGet(instruction, instruction->GetFieldInfo());
6132 }
6133 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6134 void LocationsBuilderARMVIXL::VisitPredicatedInstanceFieldGet(
6135     HPredicatedInstanceFieldGet* instruction) {
6136   HandleFieldGet(instruction, instruction->GetFieldInfo());
6137 }
6138 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6139 void InstructionCodeGeneratorARMVIXL::VisitPredicatedInstanceFieldGet(
6140     HPredicatedInstanceFieldGet* instruction) {
6141   vixl::aarch32::Label finish;
6142   __ CompareAndBranchIfZero(InputRegisterAt(instruction, 1), &finish, false);
6143   HandleFieldGet(instruction, instruction->GetFieldInfo());
6144   __ Bind(&finish);
6145 }
6146 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6147 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6148   HandleFieldGet(instruction, instruction->GetFieldInfo());
6149 }
6150 
VisitStaticFieldGet(HStaticFieldGet * instruction)6151 void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6152   HandleFieldGet(instruction, instruction->GetFieldInfo());
6153 }
6154 
VisitStaticFieldGet(HStaticFieldGet * instruction)6155 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6156   HandleFieldGet(instruction, instruction->GetFieldInfo());
6157 }
6158 
VisitStaticFieldSet(HStaticFieldSet * instruction)6159 void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6160   HandleFieldSet(instruction, instruction->GetFieldInfo());
6161 }
6162 
VisitStaticFieldSet(HStaticFieldSet * instruction)6163 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6164   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6165 }
6166 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6167 void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6168   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0));
6169 }
6170 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6171 void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6172   __ Mov(r0, instruction->GetFormat()->GetValue());
6173   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6174 }
6175 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6176 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
6177     HUnresolvedInstanceFieldGet* instruction) {
6178   FieldAccessCallingConventionARMVIXL calling_convention;
6179   codegen_->CreateUnresolvedFieldLocationSummary(
6180       instruction, instruction->GetFieldType(), calling_convention);
6181 }
6182 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6183 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
6184     HUnresolvedInstanceFieldGet* instruction) {
6185   FieldAccessCallingConventionARMVIXL calling_convention;
6186   codegen_->GenerateUnresolvedFieldAccess(instruction,
6187                                           instruction->GetFieldType(),
6188                                           instruction->GetFieldIndex(),
6189                                           instruction->GetDexPc(),
6190                                           calling_convention);
6191 }
6192 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6193 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
6194     HUnresolvedInstanceFieldSet* instruction) {
6195   FieldAccessCallingConventionARMVIXL calling_convention;
6196   codegen_->CreateUnresolvedFieldLocationSummary(
6197       instruction, instruction->GetFieldType(), calling_convention);
6198 }
6199 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6200 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
6201     HUnresolvedInstanceFieldSet* instruction) {
6202   FieldAccessCallingConventionARMVIXL calling_convention;
6203   codegen_->GenerateUnresolvedFieldAccess(instruction,
6204                                           instruction->GetFieldType(),
6205                                           instruction->GetFieldIndex(),
6206                                           instruction->GetDexPc(),
6207                                           calling_convention);
6208 }
6209 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6210 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
6211     HUnresolvedStaticFieldGet* instruction) {
6212   FieldAccessCallingConventionARMVIXL calling_convention;
6213   codegen_->CreateUnresolvedFieldLocationSummary(
6214       instruction, instruction->GetFieldType(), calling_convention);
6215 }
6216 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6217 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
6218     HUnresolvedStaticFieldGet* instruction) {
6219   FieldAccessCallingConventionARMVIXL calling_convention;
6220   codegen_->GenerateUnresolvedFieldAccess(instruction,
6221                                           instruction->GetFieldType(),
6222                                           instruction->GetFieldIndex(),
6223                                           instruction->GetDexPc(),
6224                                           calling_convention);
6225 }
6226 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6227 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
6228     HUnresolvedStaticFieldSet* instruction) {
6229   FieldAccessCallingConventionARMVIXL calling_convention;
6230   codegen_->CreateUnresolvedFieldLocationSummary(
6231       instruction, instruction->GetFieldType(), calling_convention);
6232 }
6233 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6234 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
6235     HUnresolvedStaticFieldSet* instruction) {
6236   FieldAccessCallingConventionARMVIXL calling_convention;
6237   codegen_->GenerateUnresolvedFieldAccess(instruction,
6238                                           instruction->GetFieldType(),
6239                                           instruction->GetFieldIndex(),
6240                                           instruction->GetDexPc(),
6241                                           calling_convention);
6242 }
6243 
VisitNullCheck(HNullCheck * instruction)6244 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6245   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6246   locations->SetInAt(0, Location::RequiresRegister());
6247 }
6248 
GenerateImplicitNullCheck(HNullCheck * instruction)6249 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
6250   if (CanMoveNullCheckToUser(instruction)) {
6251     return;
6252   }
6253 
6254   UseScratchRegisterScope temps(GetVIXLAssembler());
6255   // Ensure the pc position is recorded immediately after the `ldr` instruction.
6256   ExactAssemblyScope aas(GetVIXLAssembler(),
6257                          vixl32::kMaxInstructionSizeInBytes,
6258                          CodeBufferCheckScope::kMaximumSize);
6259   __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
6260   RecordPcInfo(instruction, instruction->GetDexPc());
6261 }
6262 
GenerateExplicitNullCheck(HNullCheck * instruction)6263 void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
6264   NullCheckSlowPathARMVIXL* slow_path =
6265       new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction);
6266   AddSlowPath(slow_path);
6267   __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
6268 }
6269 
VisitNullCheck(HNullCheck * instruction)6270 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6271   codegen_->GenerateNullCheck(instruction);
6272 }
6273 
LoadFromShiftedRegOffset(DataType::Type type,Location out_loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6274 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type,
6275                                                     Location out_loc,
6276                                                     vixl32::Register base,
6277                                                     vixl32::Register reg_index,
6278                                                     vixl32::Condition cond) {
6279   uint32_t shift_count = DataType::SizeShift(type);
6280   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6281 
6282   switch (type) {
6283     case DataType::Type::kBool:
6284     case DataType::Type::kUint8:
6285       __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
6286       break;
6287     case DataType::Type::kInt8:
6288       __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
6289       break;
6290     case DataType::Type::kUint16:
6291       __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
6292       break;
6293     case DataType::Type::kInt16:
6294       __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
6295       break;
6296     case DataType::Type::kReference:
6297     case DataType::Type::kInt32:
6298       __ Ldr(cond, RegisterFrom(out_loc), mem_address);
6299       break;
6300     // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
6301     case DataType::Type::kInt64:
6302     case DataType::Type::kFloat32:
6303     case DataType::Type::kFloat64:
6304     default:
6305       LOG(FATAL) << "Unreachable type " << type;
6306       UNREACHABLE();
6307   }
6308 }
6309 
StoreToShiftedRegOffset(DataType::Type type,Location loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6310 void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
6311                                                    Location loc,
6312                                                    vixl32::Register base,
6313                                                    vixl32::Register reg_index,
6314                                                    vixl32::Condition cond) {
6315   uint32_t shift_count = DataType::SizeShift(type);
6316   MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6317 
6318   switch (type) {
6319     case DataType::Type::kBool:
6320     case DataType::Type::kUint8:
6321     case DataType::Type::kInt8:
6322       __ Strb(cond, RegisterFrom(loc), mem_address);
6323       break;
6324     case DataType::Type::kUint16:
6325     case DataType::Type::kInt16:
6326       __ Strh(cond, RegisterFrom(loc), mem_address);
6327       break;
6328     case DataType::Type::kReference:
6329     case DataType::Type::kInt32:
6330       __ Str(cond, RegisterFrom(loc), mem_address);
6331       break;
6332     // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
6333     case DataType::Type::kInt64:
6334     case DataType::Type::kFloat32:
6335     case DataType::Type::kFloat64:
6336     default:
6337       LOG(FATAL) << "Unreachable type " << type;
6338       UNREACHABLE();
6339   }
6340 }
6341 
VisitArrayGet(HArrayGet * instruction)6342 void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6343   bool object_array_get_with_read_barrier =
6344       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
6345   LocationSummary* locations =
6346       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6347                                                        object_array_get_with_read_barrier
6348                                                            ? LocationSummary::kCallOnSlowPath
6349                                                            : LocationSummary::kNoCall);
6350   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6351     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6352   }
6353   locations->SetInAt(0, Location::RequiresRegister());
6354   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6355   if (DataType::IsFloatingPointType(instruction->GetType())) {
6356     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6357   } else {
6358     // The output overlaps in the case of an object array get with
6359     // read barriers enabled: we do not want the move to overwrite the
6360     // array's location, as we need it to emit the read barrier.
6361     locations->SetOut(
6362         Location::RequiresRegister(),
6363         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
6364   }
6365   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6366     if (instruction->GetIndex()->IsConstant()) {
6367       // Array loads with constant index are treated as field loads.
6368       // We need a temporary register for the read barrier load in
6369       // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6370       // only if the offset is too big.
6371       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
6372       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
6373       offset += index << DataType::SizeShift(DataType::Type::kReference);
6374       if (offset >= kReferenceLoadMinFarOffset) {
6375         locations->AddTemp(Location::RequiresRegister());
6376       }
6377     } else {
6378       // We need a non-scratch temporary for the array data pointer in
6379       // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
6380       locations->AddTemp(Location::RequiresRegister());
6381     }
6382   } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6383     // Also need a temporary for String compression feature.
6384     locations->AddTemp(Location::RequiresRegister());
6385   }
6386 }
6387 
VisitArrayGet(HArrayGet * instruction)6388 void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6389   LocationSummary* locations = instruction->GetLocations();
6390   Location obj_loc = locations->InAt(0);
6391   vixl32::Register obj = InputRegisterAt(instruction, 0);
6392   Location index = locations->InAt(1);
6393   Location out_loc = locations->Out();
6394   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6395   DataType::Type type = instruction->GetType();
6396   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
6397                                         instruction->IsStringCharAt();
6398   HInstruction* array_instr = instruction->GetArray();
6399   bool has_intermediate_address = array_instr->IsIntermediateAddress();
6400 
6401   switch (type) {
6402     case DataType::Type::kBool:
6403     case DataType::Type::kUint8:
6404     case DataType::Type::kInt8:
6405     case DataType::Type::kUint16:
6406     case DataType::Type::kInt16:
6407     case DataType::Type::kInt32: {
6408       vixl32::Register length;
6409       if (maybe_compressed_char_at) {
6410         length = RegisterFrom(locations->GetTemp(0));
6411         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6412         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6413         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6414         GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
6415         codegen_->MaybeRecordImplicitNullCheck(instruction);
6416       }
6417       if (index.IsConstant()) {
6418         int32_t const_index = Int32ConstantFrom(index);
6419         if (maybe_compressed_char_at) {
6420           vixl32::Label uncompressed_load, done;
6421           vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6422           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
6423           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6424                         "Expecting 0=compressed, 1=uncompressed");
6425           __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6426           GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
6427                                          RegisterFrom(out_loc),
6428                                          obj,
6429                                          data_offset + const_index);
6430           __ B(final_label);
6431           __ Bind(&uncompressed_load);
6432           GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16),
6433                                          RegisterFrom(out_loc),
6434                                          obj,
6435                                          data_offset + (const_index << 1));
6436           if (done.IsReferenced()) {
6437             __ Bind(&done);
6438           }
6439         } else {
6440           uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type));
6441 
6442           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6443           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6444           LoadOperandType load_type = GetLoadOperandType(type);
6445           GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
6446           codegen_->MaybeRecordImplicitNullCheck(instruction);
6447         }
6448       } else {
6449         UseScratchRegisterScope temps(GetVIXLAssembler());
6450         vixl32::Register temp = temps.Acquire();
6451 
6452         if (has_intermediate_address) {
6453           // We do not need to compute the intermediate address from the array: the
6454           // input instruction has done it already. See the comment in
6455           // `TryExtractArrayAccessAddress()`.
6456           if (kIsDebugBuild) {
6457             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6458             DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6459           }
6460           temp = obj;
6461         } else {
6462           __ Add(temp, obj, data_offset);
6463         }
6464         if (maybe_compressed_char_at) {
6465           vixl32::Label uncompressed_load, done;
6466           vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6467           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
6468           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6469                         "Expecting 0=compressed, 1=uncompressed");
6470           __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6471           __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
6472           __ B(final_label);
6473           __ Bind(&uncompressed_load);
6474           __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
6475           if (done.IsReferenced()) {
6476             __ Bind(&done);
6477           }
6478         } else {
6479           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6480           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6481           codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6482           codegen_->MaybeRecordImplicitNullCheck(instruction);
6483         }
6484       }
6485       break;
6486     }
6487 
6488     case DataType::Type::kReference: {
6489       // The read barrier instrumentation of object ArrayGet
6490       // instructions does not support the HIntermediateAddress
6491       // instruction.
6492       DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
6493 
6494       static_assert(
6495           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6496           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6497       // /* HeapReference<Object> */ out =
6498       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6499       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
6500         // Note that a potential implicit null check is handled in this
6501         // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
6502         DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
6503         if (index.IsConstant()) {
6504           // Array load with a constant index can be treated as a field load.
6505           Location maybe_temp =
6506               (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6507           data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
6508           codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6509                                                           out_loc,
6510                                                           obj,
6511                                                           data_offset,
6512                                                           maybe_temp,
6513                                                           /* needs_null_check= */ false);
6514         } else {
6515           Location temp = locations->GetTemp(0);
6516           codegen_->GenerateArrayLoadWithBakerReadBarrier(
6517               out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false);
6518         }
6519       } else {
6520         vixl32::Register out = OutputRegister(instruction);
6521         if (index.IsConstant()) {
6522           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6523           {
6524             // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6525             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6526             GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
6527             codegen_->MaybeRecordImplicitNullCheck(instruction);
6528           }
6529           // If read barriers are enabled, emit read barriers other than
6530           // Baker's using a slow path (and also unpoison the loaded
6531           // reference, if heap poisoning is enabled).
6532           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6533         } else {
6534           UseScratchRegisterScope temps(GetVIXLAssembler());
6535           vixl32::Register temp = temps.Acquire();
6536 
6537           if (has_intermediate_address) {
6538             // We do not need to compute the intermediate address from the array: the
6539             // input instruction has done it already. See the comment in
6540             // `TryExtractArrayAccessAddress()`.
6541             if (kIsDebugBuild) {
6542               HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6543               DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6544             }
6545             temp = obj;
6546           } else {
6547             __ Add(temp, obj, data_offset);
6548           }
6549           {
6550             // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6551             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6552             codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6553             temps.Close();
6554             codegen_->MaybeRecordImplicitNullCheck(instruction);
6555           }
6556           // If read barriers are enabled, emit read barriers other than
6557           // Baker's using a slow path (and also unpoison the loaded
6558           // reference, if heap poisoning is enabled).
6559           codegen_->MaybeGenerateReadBarrierSlow(
6560               instruction, out_loc, out_loc, obj_loc, data_offset, index);
6561         }
6562       }
6563       break;
6564     }
6565 
6566     case DataType::Type::kInt64: {
6567       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6568       // As two macro instructions can be emitted the max size is doubled.
6569       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6570       if (index.IsConstant()) {
6571         size_t offset =
6572             (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6573         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
6574       } else {
6575         UseScratchRegisterScope temps(GetVIXLAssembler());
6576         vixl32::Register temp = temps.Acquire();
6577         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6578         GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
6579       }
6580       codegen_->MaybeRecordImplicitNullCheck(instruction);
6581       break;
6582     }
6583 
6584     case DataType::Type::kFloat32: {
6585       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6586       // As two macro instructions can be emitted the max size is doubled.
6587       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6588       vixl32::SRegister out = SRegisterFrom(out_loc);
6589       if (index.IsConstant()) {
6590         size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6591         GetAssembler()->LoadSFromOffset(out, obj, offset);
6592       } else {
6593         UseScratchRegisterScope temps(GetVIXLAssembler());
6594         vixl32::Register temp = temps.Acquire();
6595         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6596         GetAssembler()->LoadSFromOffset(out, temp, data_offset);
6597       }
6598       codegen_->MaybeRecordImplicitNullCheck(instruction);
6599       break;
6600     }
6601 
6602     case DataType::Type::kFloat64: {
6603       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6604       // As two macro instructions can be emitted the max size is doubled.
6605       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6606       if (index.IsConstant()) {
6607         size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6608         GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
6609       } else {
6610         UseScratchRegisterScope temps(GetVIXLAssembler());
6611         vixl32::Register temp = temps.Acquire();
6612         __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6613         GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
6614       }
6615       codegen_->MaybeRecordImplicitNullCheck(instruction);
6616       break;
6617     }
6618 
6619     case DataType::Type::kUint32:
6620     case DataType::Type::kUint64:
6621     case DataType::Type::kVoid:
6622       LOG(FATAL) << "Unreachable type " << type;
6623       UNREACHABLE();
6624   }
6625 }
6626 
VisitArraySet(HArraySet * instruction)6627 void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
6628   DataType::Type value_type = instruction->GetComponentType();
6629 
6630   bool needs_write_barrier =
6631       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6632   bool needs_type_check = instruction->NeedsTypeCheck();
6633 
6634   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6635       instruction,
6636       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6637 
6638   locations->SetInAt(0, Location::RequiresRegister());
6639   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6640   if (DataType::IsFloatingPointType(value_type)) {
6641     locations->SetInAt(2, Location::RequiresFpuRegister());
6642   } else {
6643     locations->SetInAt(2, Location::RequiresRegister());
6644   }
6645   if (needs_write_barrier) {
6646     // Temporary registers for the write barrier.
6647     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
6648     locations->AddTemp(Location::RequiresRegister());
6649   }
6650 }
6651 
VisitArraySet(HArraySet * instruction)6652 void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
6653   LocationSummary* locations = instruction->GetLocations();
6654   vixl32::Register array = InputRegisterAt(instruction, 0);
6655   Location index = locations->InAt(1);
6656   DataType::Type value_type = instruction->GetComponentType();
6657   bool needs_type_check = instruction->NeedsTypeCheck();
6658   bool needs_write_barrier =
6659       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6660   uint32_t data_offset =
6661       mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
6662   Location value_loc = locations->InAt(2);
6663   HInstruction* array_instr = instruction->GetArray();
6664   bool has_intermediate_address = array_instr->IsIntermediateAddress();
6665 
6666   switch (value_type) {
6667     case DataType::Type::kBool:
6668     case DataType::Type::kUint8:
6669     case DataType::Type::kInt8:
6670     case DataType::Type::kUint16:
6671     case DataType::Type::kInt16:
6672     case DataType::Type::kInt32: {
6673       if (index.IsConstant()) {
6674         int32_t const_index = Int32ConstantFrom(index);
6675         uint32_t full_offset =
6676             data_offset + (const_index << DataType::SizeShift(value_type));
6677         StoreOperandType store_type = GetStoreOperandType(value_type);
6678         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6679         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6680         GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
6681         codegen_->MaybeRecordImplicitNullCheck(instruction);
6682       } else {
6683         UseScratchRegisterScope temps(GetVIXLAssembler());
6684         vixl32::Register temp = temps.Acquire();
6685 
6686         if (has_intermediate_address) {
6687           // We do not need to compute the intermediate address from the array: the
6688           // input instruction has done it already. See the comment in
6689           // `TryExtractArrayAccessAddress()`.
6690           if (kIsDebugBuild) {
6691             HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6692             DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6693           }
6694           temp = array;
6695         } else {
6696           __ Add(temp, array, data_offset);
6697         }
6698         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6699         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6700         codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6701         codegen_->MaybeRecordImplicitNullCheck(instruction);
6702       }
6703       break;
6704     }
6705 
6706     case DataType::Type::kReference: {
6707       vixl32::Register value = RegisterFrom(value_loc);
6708       // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
6709       // See the comment in instruction_simplifier_shared.cc.
6710       DCHECK(!has_intermediate_address);
6711 
6712       if (instruction->InputAt(2)->IsNullConstant()) {
6713         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6714         // As two macro instructions can be emitted the max size is doubled.
6715         EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6716         // Just setting null.
6717         if (index.IsConstant()) {
6718           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6719           GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
6720         } else {
6721           DCHECK(index.IsRegister()) << index;
6722           UseScratchRegisterScope temps(GetVIXLAssembler());
6723           vixl32::Register temp = temps.Acquire();
6724           __ Add(temp, array, data_offset);
6725           codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6726         }
6727         codegen_->MaybeRecordImplicitNullCheck(instruction);
6728         DCHECK(!needs_write_barrier);
6729         DCHECK(!needs_type_check);
6730         break;
6731       }
6732 
6733       DCHECK(needs_write_barrier);
6734       Location temp1_loc = locations->GetTemp(0);
6735       vixl32::Register temp1 = RegisterFrom(temp1_loc);
6736       Location temp2_loc = locations->GetTemp(1);
6737       vixl32::Register temp2 = RegisterFrom(temp2_loc);
6738 
6739       bool can_value_be_null = instruction->GetValueCanBeNull();
6740       vixl32::Label do_store;
6741       if (can_value_be_null) {
6742         __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false);
6743       }
6744 
6745       SlowPathCodeARMVIXL* slow_path = nullptr;
6746       if (needs_type_check) {
6747         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
6748         codegen_->AddSlowPath(slow_path);
6749 
6750         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6751         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6752         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6753 
6754         // Note that when read barriers are enabled, the type checks
6755         // are performed without read barriers.  This is fine, even in
6756         // the case where a class object is in the from-space after
6757         // the flip, as a comparison involving such a type would not
6758         // produce a false positive; it may of course produce a false
6759         // negative, in which case we would take the ArraySet slow
6760         // path.
6761 
6762         {
6763           // Ensure we record the pc position immediately after the `ldr` instruction.
6764           ExactAssemblyScope aas(GetVIXLAssembler(),
6765                                  vixl32::kMaxInstructionSizeInBytes,
6766                                  CodeBufferCheckScope::kMaximumSize);
6767           // /* HeapReference<Class> */ temp1 = array->klass_
6768           __ ldr(temp1, MemOperand(array, class_offset));
6769           codegen_->MaybeRecordImplicitNullCheck(instruction);
6770         }
6771         GetAssembler()->MaybeUnpoisonHeapReference(temp1);
6772 
6773         // /* HeapReference<Class> */ temp1 = temp1->component_type_
6774         GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
6775         // /* HeapReference<Class> */ temp2 = value->klass_
6776         GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
6777         // If heap poisoning is enabled, no need to unpoison `temp1`
6778         // nor `temp2`, as we are comparing two poisoned references.
6779         __ Cmp(temp1, temp2);
6780 
6781         if (instruction->StaticTypeOfArrayIsObjectArray()) {
6782           vixl32::Label do_put;
6783           __ B(eq, &do_put, /* is_far_target= */ false);
6784           // If heap poisoning is enabled, the `temp1` reference has
6785           // not been unpoisoned yet; unpoison it now.
6786           GetAssembler()->MaybeUnpoisonHeapReference(temp1);
6787 
6788           // /* HeapReference<Class> */ temp1 = temp1->super_class_
6789           GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
6790           // If heap poisoning is enabled, no need to unpoison
6791           // `temp1`, as we are comparing against null below.
6792           __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
6793           __ Bind(&do_put);
6794         } else {
6795           __ B(ne, slow_path->GetEntryLabel());
6796         }
6797       }
6798 
6799       codegen_->MarkGCCard(temp1, temp2, array, value, /* value_can_be_null= */ false);
6800 
6801       if (can_value_be_null) {
6802         DCHECK(do_store.IsReferenced());
6803         __ Bind(&do_store);
6804       }
6805 
6806       vixl32::Register source = value;
6807       if (kPoisonHeapReferences) {
6808         // Note that in the case where `value` is a null reference,
6809         // we do not enter this block, as a null reference does not
6810         // need poisoning.
6811         DCHECK_EQ(value_type, DataType::Type::kReference);
6812         __ Mov(temp1, value);
6813         GetAssembler()->PoisonHeapReference(temp1);
6814         source = temp1;
6815       }
6816 
6817       {
6818         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6819         // As two macro instructions can be emitted the max size is doubled.
6820         EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6821         if (index.IsConstant()) {
6822           size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6823           GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
6824         } else {
6825           DCHECK(index.IsRegister()) << index;
6826 
6827           UseScratchRegisterScope temps(GetVIXLAssembler());
6828           vixl32::Register temp = temps.Acquire();
6829           __ Add(temp, array, data_offset);
6830           codegen_->StoreToShiftedRegOffset(value_type,
6831                                             LocationFrom(source),
6832                                             temp,
6833                                             RegisterFrom(index));
6834         }
6835 
6836         if (can_value_be_null || !needs_type_check) {
6837           codegen_->MaybeRecordImplicitNullCheck(instruction);
6838         }
6839       }
6840 
6841       if (slow_path != nullptr) {
6842         __ Bind(slow_path->GetExitLabel());
6843       }
6844 
6845       break;
6846     }
6847 
6848     case DataType::Type::kInt64: {
6849       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6850       // As two macro instructions can be emitted the max size is doubled.
6851       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6852       Location value = locations->InAt(2);
6853       if (index.IsConstant()) {
6854         size_t offset =
6855             (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6856         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
6857       } else {
6858         UseScratchRegisterScope temps(GetVIXLAssembler());
6859         vixl32::Register temp = temps.Acquire();
6860         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6861         GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
6862       }
6863       codegen_->MaybeRecordImplicitNullCheck(instruction);
6864       break;
6865     }
6866 
6867     case DataType::Type::kFloat32: {
6868       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6869       // As two macro instructions can be emitted the max size is doubled.
6870       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6871       Location value = locations->InAt(2);
6872       DCHECK(value.IsFpuRegister());
6873       if (index.IsConstant()) {
6874         size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6875         GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
6876       } else {
6877         UseScratchRegisterScope temps(GetVIXLAssembler());
6878         vixl32::Register temp = temps.Acquire();
6879         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6880         GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
6881       }
6882       codegen_->MaybeRecordImplicitNullCheck(instruction);
6883       break;
6884     }
6885 
6886     case DataType::Type::kFloat64: {
6887       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6888       // As two macro instructions can be emitted the max size is doubled.
6889       EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6890       Location value = locations->InAt(2);
6891       DCHECK(value.IsFpuRegisterPair());
6892       if (index.IsConstant()) {
6893         size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6894         GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
6895       } else {
6896         UseScratchRegisterScope temps(GetVIXLAssembler());
6897         vixl32::Register temp = temps.Acquire();
6898         __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6899         GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
6900       }
6901       codegen_->MaybeRecordImplicitNullCheck(instruction);
6902       break;
6903     }
6904 
6905     case DataType::Type::kUint32:
6906     case DataType::Type::kUint64:
6907     case DataType::Type::kVoid:
6908       LOG(FATAL) << "Unreachable type " << value_type;
6909       UNREACHABLE();
6910   }
6911 }
6912 
VisitArrayLength(HArrayLength * instruction)6913 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
6914   LocationSummary* locations =
6915       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6916   locations->SetInAt(0, Location::RequiresRegister());
6917   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6918 }
6919 
VisitArrayLength(HArrayLength * instruction)6920 void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
6921   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6922   vixl32::Register obj = InputRegisterAt(instruction, 0);
6923   vixl32::Register out = OutputRegister(instruction);
6924   {
6925     ExactAssemblyScope aas(GetVIXLAssembler(),
6926                            vixl32::kMaxInstructionSizeInBytes,
6927                            CodeBufferCheckScope::kMaximumSize);
6928     __ ldr(out, MemOperand(obj, offset));
6929     codegen_->MaybeRecordImplicitNullCheck(instruction);
6930   }
6931   // Mask out compression flag from String's array length.
6932   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6933     __ Lsr(out, out, 1u);
6934   }
6935 }
6936 
VisitIntermediateAddress(HIntermediateAddress * instruction)6937 void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
6938   LocationSummary* locations =
6939       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6940 
6941   locations->SetInAt(0, Location::RequiresRegister());
6942   locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
6943   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6944 }
6945 
VisitIntermediateAddress(HIntermediateAddress * instruction)6946 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
6947   vixl32::Register out = OutputRegister(instruction);
6948   vixl32::Register first = InputRegisterAt(instruction, 0);
6949   Location second = instruction->GetLocations()->InAt(1);
6950 
6951   if (second.IsRegister()) {
6952     __ Add(out, first, RegisterFrom(second));
6953   } else {
6954     __ Add(out, first, Int32ConstantFrom(second));
6955   }
6956 }
6957 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)6958 void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
6959     HIntermediateAddressIndex* instruction) {
6960   LOG(FATAL) << "Unreachable " << instruction->GetId();
6961 }
6962 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)6963 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
6964     HIntermediateAddressIndex* instruction) {
6965   LOG(FATAL) << "Unreachable " << instruction->GetId();
6966 }
6967 
VisitBoundsCheck(HBoundsCheck * instruction)6968 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
6969   RegisterSet caller_saves = RegisterSet::Empty();
6970   InvokeRuntimeCallingConventionARMVIXL calling_convention;
6971   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
6972   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
6973   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6974 
6975   HInstruction* index = instruction->InputAt(0);
6976   HInstruction* length = instruction->InputAt(1);
6977   // If both index and length are constants we can statically check the bounds. But if at least one
6978   // of them is not encodable ArmEncodableConstantOrRegister will create
6979   // Location::RequiresRegister() which is not desired to happen. Instead we create constant
6980   // locations.
6981   bool both_const = index->IsConstant() && length->IsConstant();
6982   locations->SetInAt(0, both_const
6983       ? Location::ConstantLocation(index->AsConstant())
6984       : ArmEncodableConstantOrRegister(index, CMP));
6985   locations->SetInAt(1, both_const
6986       ? Location::ConstantLocation(length->AsConstant())
6987       : ArmEncodableConstantOrRegister(length, CMP));
6988 }
6989 
VisitBoundsCheck(HBoundsCheck * instruction)6990 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
6991   LocationSummary* locations = instruction->GetLocations();
6992   Location index_loc = locations->InAt(0);
6993   Location length_loc = locations->InAt(1);
6994 
6995   if (length_loc.IsConstant()) {
6996     int32_t length = Int32ConstantFrom(length_loc);
6997     if (index_loc.IsConstant()) {
6998       // BCE will remove the bounds check if we are guaranteed to pass.
6999       int32_t index = Int32ConstantFrom(index_loc);
7000       if (index < 0 || index >= length) {
7001         SlowPathCodeARMVIXL* slow_path =
7002             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7003         codegen_->AddSlowPath(slow_path);
7004         __ B(slow_path->GetEntryLabel());
7005       } else {
7006         // Some optimization after BCE may have generated this, and we should not
7007         // generate a bounds check if it is a valid range.
7008       }
7009       return;
7010     }
7011 
7012     SlowPathCodeARMVIXL* slow_path =
7013         new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7014     __ Cmp(RegisterFrom(index_loc), length);
7015     codegen_->AddSlowPath(slow_path);
7016     __ B(hs, slow_path->GetEntryLabel());
7017   } else {
7018     SlowPathCodeARMVIXL* slow_path =
7019         new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
7020     __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
7021     codegen_->AddSlowPath(slow_path);
7022     __ B(ls, slow_path->GetEntryLabel());
7023   }
7024 }
7025 
MarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object,vixl32::Register value,bool value_can_be_null)7026 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
7027                                       vixl32::Register card,
7028                                       vixl32::Register object,
7029                                       vixl32::Register value,
7030                                       bool value_can_be_null) {
7031   vixl32::Label is_null;
7032   if (value_can_be_null) {
7033     __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false);
7034   }
7035   // Load the address of the card table into `card`.
7036   GetAssembler()->LoadFromOffset(
7037       kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
7038   // Calculate the offset (in the card table) of the card corresponding to
7039   // `object`.
7040   __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
7041   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
7042   // `object`'s card.
7043   //
7044   // Register `card` contains the address of the card table. Note that the card
7045   // table's base is biased during its creation so that it always starts at an
7046   // address whose least-significant byte is equal to `kCardDirty` (see
7047   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
7048   // below writes the `kCardDirty` (byte) value into the `object`'s card
7049   // (located at `card + object >> kCardShift`).
7050   //
7051   // This dual use of the value in register `card` (1. to calculate the location
7052   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
7053   // (no need to explicitly load `kCardDirty` as an immediate value).
7054   __ Strb(card, MemOperand(card, temp));
7055   if (value_can_be_null) {
7056     __ Bind(&is_null);
7057   }
7058 }
7059 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)7060 void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
7061   LOG(FATAL) << "Unreachable";
7062 }
7063 
VisitParallelMove(HParallelMove * instruction)7064 void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) {
7065   if (instruction->GetNext()->IsSuspendCheck() &&
7066       instruction->GetBlock()->GetLoopInformation() != nullptr) {
7067     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
7068     // The back edge will generate the suspend check.
7069     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
7070   }
7071 
7072   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
7073 }
7074 
VisitSuspendCheck(HSuspendCheck * instruction)7075 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7076   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7077       instruction, LocationSummary::kCallOnSlowPath);
7078   locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7079 }
7080 
VisitSuspendCheck(HSuspendCheck * instruction)7081 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
7082   HBasicBlock* block = instruction->GetBlock();
7083   if (block->GetLoopInformation() != nullptr) {
7084     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7085     // The back edge will generate the suspend check.
7086     return;
7087   }
7088   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7089     // The goto will generate the suspend check.
7090     return;
7091   }
7092   GenerateSuspendCheck(instruction, nullptr);
7093   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14);
7094 }
7095 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7096 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
7097                                                            HBasicBlock* successor) {
7098   SuspendCheckSlowPathARMVIXL* slow_path =
7099       down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
7100   if (slow_path == nullptr) {
7101     slow_path =
7102         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor);
7103     instruction->SetSlowPath(slow_path);
7104     codegen_->AddSlowPath(slow_path);
7105     if (successor != nullptr) {
7106       DCHECK(successor->IsLoopHeader());
7107     }
7108   } else {
7109     DCHECK_EQ(slow_path->GetSuccessor(), successor);
7110   }
7111 
7112   UseScratchRegisterScope temps(GetVIXLAssembler());
7113   vixl32::Register temp = temps.Acquire();
7114   GetAssembler()->LoadFromOffset(
7115       kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
7116   if (successor == nullptr) {
7117     __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
7118     __ Bind(slow_path->GetReturnLabel());
7119   } else {
7120     __ CompareAndBranchIfZero(temp, codegen_->GetLabelOf(successor));
7121     __ B(slow_path->GetEntryLabel());
7122   }
7123 }
7124 
GetAssembler() const7125 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
7126   return codegen_->GetAssembler();
7127 }
7128 
EmitMove(size_t index)7129 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
7130   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7131   MoveOperands* move = moves_[index];
7132   Location source = move->GetSource();
7133   Location destination = move->GetDestination();
7134 
7135   if (source.IsRegister()) {
7136     if (destination.IsRegister()) {
7137       __ Mov(RegisterFrom(destination), RegisterFrom(source));
7138     } else if (destination.IsFpuRegister()) {
7139       __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
7140     } else {
7141       DCHECK(destination.IsStackSlot());
7142       GetAssembler()->StoreToOffset(kStoreWord,
7143                                     RegisterFrom(source),
7144                                     sp,
7145                                     destination.GetStackIndex());
7146     }
7147   } else if (source.IsStackSlot()) {
7148     if (destination.IsRegister()) {
7149       GetAssembler()->LoadFromOffset(kLoadWord,
7150                                      RegisterFrom(destination),
7151                                      sp,
7152                                      source.GetStackIndex());
7153     } else if (destination.IsFpuRegister()) {
7154       GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
7155     } else {
7156       DCHECK(destination.IsStackSlot());
7157       vixl32::Register temp = temps.Acquire();
7158       GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
7159       GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7160     }
7161   } else if (source.IsFpuRegister()) {
7162     if (destination.IsRegister()) {
7163       __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
7164     } else if (destination.IsFpuRegister()) {
7165       __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
7166     } else {
7167       DCHECK(destination.IsStackSlot());
7168       GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
7169     }
7170   } else if (source.IsDoubleStackSlot()) {
7171     if (destination.IsDoubleStackSlot()) {
7172       vixl32::DRegister temp = temps.AcquireD();
7173       GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
7174       GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
7175     } else if (destination.IsRegisterPair()) {
7176       DCHECK(ExpectedPairLayout(destination));
7177       GetAssembler()->LoadFromOffset(
7178           kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
7179     } else {
7180       DCHECK(destination.IsFpuRegisterPair()) << destination;
7181       GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
7182     }
7183   } else if (source.IsRegisterPair()) {
7184     if (destination.IsRegisterPair()) {
7185       __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
7186       __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
7187     } else if (destination.IsFpuRegisterPair()) {
7188       __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
7189     } else {
7190       DCHECK(destination.IsDoubleStackSlot()) << destination;
7191       DCHECK(ExpectedPairLayout(source));
7192       GetAssembler()->StoreToOffset(kStoreWordPair,
7193                                     LowRegisterFrom(source),
7194                                     sp,
7195                                     destination.GetStackIndex());
7196     }
7197   } else if (source.IsFpuRegisterPair()) {
7198     if (destination.IsRegisterPair()) {
7199       __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
7200     } else if (destination.IsFpuRegisterPair()) {
7201       __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
7202     } else {
7203       DCHECK(destination.IsDoubleStackSlot()) << destination;
7204       GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
7205     }
7206   } else {
7207     DCHECK(source.IsConstant()) << source;
7208     HConstant* constant = source.GetConstant();
7209     if (constant->IsIntConstant() || constant->IsNullConstant()) {
7210       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7211       if (destination.IsRegister()) {
7212         __ Mov(RegisterFrom(destination), value);
7213       } else {
7214         DCHECK(destination.IsStackSlot());
7215         vixl32::Register temp = temps.Acquire();
7216         __ Mov(temp, value);
7217         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7218       }
7219     } else if (constant->IsLongConstant()) {
7220       int64_t value = Int64ConstantFrom(source);
7221       if (destination.IsRegisterPair()) {
7222         __ Mov(LowRegisterFrom(destination), Low32Bits(value));
7223         __ Mov(HighRegisterFrom(destination), High32Bits(value));
7224       } else {
7225         DCHECK(destination.IsDoubleStackSlot()) << destination;
7226         vixl32::Register temp = temps.Acquire();
7227         __ Mov(temp, Low32Bits(value));
7228         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7229         __ Mov(temp, High32Bits(value));
7230         GetAssembler()->StoreToOffset(kStoreWord,
7231                                       temp,
7232                                       sp,
7233                                       destination.GetHighStackIndex(kArmWordSize));
7234       }
7235     } else if (constant->IsDoubleConstant()) {
7236       double value = constant->AsDoubleConstant()->GetValue();
7237       if (destination.IsFpuRegisterPair()) {
7238         __ Vmov(DRegisterFrom(destination), value);
7239       } else {
7240         DCHECK(destination.IsDoubleStackSlot()) << destination;
7241         uint64_t int_value = bit_cast<uint64_t, double>(value);
7242         vixl32::Register temp = temps.Acquire();
7243         __ Mov(temp, Low32Bits(int_value));
7244         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7245         __ Mov(temp, High32Bits(int_value));
7246         GetAssembler()->StoreToOffset(kStoreWord,
7247                                       temp,
7248                                       sp,
7249                                       destination.GetHighStackIndex(kArmWordSize));
7250       }
7251     } else {
7252       DCHECK(constant->IsFloatConstant()) << constant->DebugName();
7253       float value = constant->AsFloatConstant()->GetValue();
7254       if (destination.IsFpuRegister()) {
7255         __ Vmov(SRegisterFrom(destination), value);
7256       } else {
7257         DCHECK(destination.IsStackSlot());
7258         vixl32::Register temp = temps.Acquire();
7259         __ Mov(temp, bit_cast<int32_t, float>(value));
7260         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7261       }
7262     }
7263   }
7264 }
7265 
Exchange(vixl32::Register reg,int mem)7266 void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
7267   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7268   vixl32::Register temp = temps.Acquire();
7269   __ Mov(temp, reg);
7270   GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
7271   GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7272 }
7273 
Exchange(int mem1,int mem2)7274 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
7275   // TODO(VIXL32): Double check the performance of this implementation.
7276   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7277   vixl32::Register temp1 = temps.Acquire();
7278   ScratchRegisterScope ensure_scratch(
7279       this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
7280   vixl32::Register temp2(ensure_scratch.GetRegister());
7281 
7282   int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
7283   GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
7284   GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
7285   GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
7286   GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
7287 }
7288 
EmitSwap(size_t index)7289 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
7290   MoveOperands* move = moves_[index];
7291   Location source = move->GetSource();
7292   Location destination = move->GetDestination();
7293   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7294 
7295   if (source.IsRegister() && destination.IsRegister()) {
7296     vixl32::Register temp = temps.Acquire();
7297     DCHECK(!RegisterFrom(source).Is(temp));
7298     DCHECK(!RegisterFrom(destination).Is(temp));
7299     __ Mov(temp, RegisterFrom(destination));
7300     __ Mov(RegisterFrom(destination), RegisterFrom(source));
7301     __ Mov(RegisterFrom(source), temp);
7302   } else if (source.IsRegister() && destination.IsStackSlot()) {
7303     Exchange(RegisterFrom(source), destination.GetStackIndex());
7304   } else if (source.IsStackSlot() && destination.IsRegister()) {
7305     Exchange(RegisterFrom(destination), source.GetStackIndex());
7306   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7307     Exchange(source.GetStackIndex(), destination.GetStackIndex());
7308   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7309     vixl32::Register temp = temps.Acquire();
7310     __ Vmov(temp, SRegisterFrom(source));
7311     __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
7312     __ Vmov(SRegisterFrom(destination), temp);
7313   } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
7314     vixl32::DRegister temp = temps.AcquireD();
7315     __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
7316     __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
7317     __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
7318     __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
7319   } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
7320     vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
7321     int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
7322     DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
7323     vixl32::DRegister temp = temps.AcquireD();
7324     __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
7325     GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
7326     GetAssembler()->StoreDToOffset(temp, sp, mem);
7327   } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
7328     vixl32::DRegister first = DRegisterFrom(source);
7329     vixl32::DRegister second = DRegisterFrom(destination);
7330     vixl32::DRegister temp = temps.AcquireD();
7331     __ Vmov(temp, first);
7332     __ Vmov(first, second);
7333     __ Vmov(second, temp);
7334   } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
7335     vixl32::DRegister reg = source.IsFpuRegisterPair()
7336         ? DRegisterFrom(source)
7337         : DRegisterFrom(destination);
7338     int mem = source.IsFpuRegisterPair()
7339         ? destination.GetStackIndex()
7340         : source.GetStackIndex();
7341     vixl32::DRegister temp = temps.AcquireD();
7342     __ Vmov(temp, reg);
7343     GetAssembler()->LoadDFromOffset(reg, sp, mem);
7344     GetAssembler()->StoreDToOffset(temp, sp, mem);
7345   } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
7346     vixl32::SRegister reg = source.IsFpuRegister()
7347         ? SRegisterFrom(source)
7348         : SRegisterFrom(destination);
7349     int mem = source.IsFpuRegister()
7350         ? destination.GetStackIndex()
7351         : source.GetStackIndex();
7352     vixl32::Register temp = temps.Acquire();
7353     __ Vmov(temp, reg);
7354     GetAssembler()->LoadSFromOffset(reg, sp, mem);
7355     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7356   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
7357     vixl32::DRegister temp1 = temps.AcquireD();
7358     vixl32::DRegister temp2 = temps.AcquireD();
7359     __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
7360     __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
7361     __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
7362     __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
7363   } else {
7364     LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
7365   }
7366 }
7367 
SpillScratch(int reg)7368 void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
7369   __ Push(vixl32::Register(reg));
7370 }
7371 
RestoreScratch(int reg)7372 void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
7373   __ Pop(vixl32::Register(reg));
7374 }
7375 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7376 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
7377     HLoadClass::LoadKind desired_class_load_kind) {
7378   switch (desired_class_load_kind) {
7379     case HLoadClass::LoadKind::kInvalid:
7380       LOG(FATAL) << "UNREACHABLE";
7381       UNREACHABLE();
7382     case HLoadClass::LoadKind::kReferrersClass:
7383       break;
7384     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7385     case HLoadClass::LoadKind::kBootImageRelRo:
7386     case HLoadClass::LoadKind::kBssEntry:
7387     case HLoadClass::LoadKind::kBssEntryPublic:
7388     case HLoadClass::LoadKind::kBssEntryPackage:
7389       DCHECK(!GetCompilerOptions().IsJitCompiler());
7390       break;
7391     case HLoadClass::LoadKind::kJitBootImageAddress:
7392     case HLoadClass::LoadKind::kJitTableAddress:
7393       DCHECK(GetCompilerOptions().IsJitCompiler());
7394       break;
7395     case HLoadClass::LoadKind::kRuntimeCall:
7396       break;
7397   }
7398   return desired_class_load_kind;
7399 }
7400 
VisitLoadClass(HLoadClass * cls)7401 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
7402   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7403   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7404     InvokeRuntimeCallingConventionARMVIXL calling_convention;
7405     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7406         cls,
7407         LocationFrom(calling_convention.GetRegisterAt(0)),
7408         LocationFrom(r0));
7409     DCHECK(calling_convention.GetRegisterAt(0).Is(r0));
7410     return;
7411   }
7412   DCHECK_EQ(cls->NeedsAccessCheck(),
7413             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7414                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7415 
7416   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
7417   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7418       ? LocationSummary::kCallOnSlowPath
7419       : LocationSummary::kNoCall;
7420   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7421   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7422     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7423   }
7424 
7425   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
7426     locations->SetInAt(0, Location::RequiresRegister());
7427   }
7428   locations->SetOut(Location::RequiresRegister());
7429   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
7430     if (!kUseReadBarrier || kUseBakerReadBarrier) {
7431       // Rely on the type resolution or initialization and marking to save everything we need.
7432       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7433     } else {
7434       // For non-Baker read barrier we have a temp-clobbering call.
7435     }
7436   }
7437 }
7438 
7439 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7440 // move.
VisitLoadClass(HLoadClass * cls)7441 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7442   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7443   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7444     codegen_->GenerateLoadClassRuntimeCall(cls);
7445     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15);
7446     return;
7447   }
7448   DCHECK_EQ(cls->NeedsAccessCheck(),
7449             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7450                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7451 
7452   LocationSummary* locations = cls->GetLocations();
7453   Location out_loc = locations->Out();
7454   vixl32::Register out = OutputRegister(cls);
7455 
7456   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
7457       ? kWithoutReadBarrier
7458       : kCompilerReadBarrierOption;
7459   bool generate_null_check = false;
7460   switch (load_kind) {
7461     case HLoadClass::LoadKind::kReferrersClass: {
7462       DCHECK(!cls->CanCallRuntime());
7463       DCHECK(!cls->MustGenerateClinitCheck());
7464       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7465       vixl32::Register current_method = InputRegisterAt(cls, 0);
7466       codegen_->GenerateGcRootFieldLoad(cls,
7467                                         out_loc,
7468                                         current_method,
7469                                         ArtMethod::DeclaringClassOffset().Int32Value(),
7470                                         read_barrier_option);
7471       break;
7472     }
7473     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7474       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7475              codegen_->GetCompilerOptions().IsBootImageExtension());
7476       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7477       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7478           codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7479       codegen_->EmitMovwMovtPlaceholder(labels, out);
7480       break;
7481     }
7482     case HLoadClass::LoadKind::kBootImageRelRo: {
7483       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7484       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7485           codegen_->NewBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
7486       codegen_->EmitMovwMovtPlaceholder(labels, out);
7487       __ Ldr(out, MemOperand(out, /* offset= */ 0));
7488       break;
7489     }
7490     case HLoadClass::LoadKind::kBssEntry:
7491     case HLoadClass::LoadKind::kBssEntryPublic:
7492     case HLoadClass::LoadKind::kBssEntryPackage: {
7493       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls);
7494       codegen_->EmitMovwMovtPlaceholder(labels, out);
7495       // All aligned loads are implicitly atomic consume operations on ARM.
7496       codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option);
7497       generate_null_check = true;
7498       break;
7499     }
7500     case HLoadClass::LoadKind::kJitBootImageAddress: {
7501       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7502       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7503       DCHECK_NE(address, 0u);
7504       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7505       break;
7506     }
7507     case HLoadClass::LoadKind::kJitTableAddress: {
7508       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
7509                                                        cls->GetTypeIndex(),
7510                                                        cls->GetClass()));
7511       // /* GcRoot<mirror::Class> */ out = *out
7512       codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option);
7513       break;
7514     }
7515     case HLoadClass::LoadKind::kRuntimeCall:
7516     case HLoadClass::LoadKind::kInvalid:
7517       LOG(FATAL) << "UNREACHABLE";
7518       UNREACHABLE();
7519   }
7520 
7521   if (generate_null_check || cls->MustGenerateClinitCheck()) {
7522     DCHECK(cls->CanCallRuntime());
7523     LoadClassSlowPathARMVIXL* slow_path =
7524         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls);
7525     codegen_->AddSlowPath(slow_path);
7526     if (generate_null_check) {
7527       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7528     }
7529     if (cls->MustGenerateClinitCheck()) {
7530       GenerateClassInitializationCheck(slow_path, out);
7531     } else {
7532       __ Bind(slow_path->GetExitLabel());
7533     }
7534     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16);
7535   }
7536 }
7537 
VisitLoadMethodHandle(HLoadMethodHandle * load)7538 void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7539   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7540   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7541   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7542 }
7543 
VisitLoadMethodHandle(HLoadMethodHandle * load)7544 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7545   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7546 }
7547 
VisitLoadMethodType(HLoadMethodType * load)7548 void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7549   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7550   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7551   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7552 }
7553 
VisitLoadMethodType(HLoadMethodType * load)7554 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7555   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7556 }
7557 
VisitClinitCheck(HClinitCheck * check)7558 void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7559   LocationSummary* locations =
7560       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7561   locations->SetInAt(0, Location::RequiresRegister());
7562   if (check->HasUses()) {
7563     locations->SetOut(Location::SameAsFirstInput());
7564   }
7565   // Rely on the type initialization to save everything we need.
7566   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7567 }
7568 
VisitClinitCheck(HClinitCheck * check)7569 void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7570   // We assume the class is not null.
7571   LoadClassSlowPathARMVIXL* slow_path =
7572       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check);
7573   codegen_->AddSlowPath(slow_path);
7574   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
7575 }
7576 
GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL * slow_path,vixl32::Register class_reg)7577 void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
7578     LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
7579   UseScratchRegisterScope temps(GetVIXLAssembler());
7580   vixl32::Register temp = temps.Acquire();
7581   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
7582   constexpr uint32_t shifted_visibly_initialized_value =
7583       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position;
7584 
7585   const size_t status_offset = mirror::Class::StatusOffset().SizeValue();
7586   GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset);
7587   __ Cmp(temp, shifted_visibly_initialized_value);
7588   __ B(lo, slow_path->GetEntryLabel());
7589   __ Bind(slow_path->GetExitLabel());
7590 }
7591 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl32::Register temp,vixl32::FlagsUpdate flags_update)7592 void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
7593     HTypeCheckInstruction* check,
7594     vixl32::Register temp,
7595     vixl32::FlagsUpdate flags_update) {
7596   uint32_t path_to_root = check->GetBitstringPathToRoot();
7597   uint32_t mask = check->GetBitstringMask();
7598   DCHECK(IsPowerOfTwo(mask + 1));
7599   size_t mask_bits = WhichPowerOf2(mask + 1);
7600 
7601   // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
7602   // the Z flag for BNE. This is indicated by the `flags_update` parameter.
7603   if (mask_bits == 16u) {
7604     // Load only the bitstring part of the status word.
7605     __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7606     // Check if the bitstring bits are equal to `path_to_root`.
7607     if (flags_update == SetFlags) {
7608       __ Cmp(temp, path_to_root);
7609     } else {
7610       __ Sub(temp, temp, path_to_root);
7611     }
7612   } else {
7613     // /* uint32_t */ temp = temp->status_
7614     __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7615     if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
7616       // Compare the bitstring bits using SUB.
7617       __ Sub(temp, temp, path_to_root);
7618       // Shift out bits that do not contribute to the comparison.
7619       __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7620     } else if (IsUint<16>(path_to_root)) {
7621       if (temp.IsLow()) {
7622         // Note: Optimized for size but contains one more dependent instruction than necessary.
7623         //       MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
7624         //       macro assembler would use the high reg IP for the constant by default.
7625         // Compare the bitstring bits using SUB.
7626         __ Sub(temp, temp, path_to_root & 0x00ffu);  // 16-bit SUB (immediate) T2
7627         __ Sub(temp, temp, path_to_root & 0xff00u);  // 32-bit SUB (immediate) T3
7628         // Shift out bits that do not contribute to the comparison.
7629         __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7630       } else {
7631         // Extract the bitstring bits.
7632         __ Ubfx(temp, temp, 0, mask_bits);
7633         // Check if the bitstring bits are equal to `path_to_root`.
7634         if (flags_update == SetFlags) {
7635           __ Cmp(temp, path_to_root);
7636         } else {
7637           __ Sub(temp, temp, path_to_root);
7638         }
7639       }
7640     } else {
7641       // Shift out bits that do not contribute to the comparison.
7642       __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7643       // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
7644       if (flags_update == SetFlags) {
7645         __ Cmp(temp, path_to_root << (32u - mask_bits));
7646       } else {
7647         __ Sub(temp, temp, path_to_root << (32u - mask_bits));
7648       }
7649     }
7650   }
7651 }
7652 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7653 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
7654     HLoadString::LoadKind desired_string_load_kind) {
7655   switch (desired_string_load_kind) {
7656     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7657     case HLoadString::LoadKind::kBootImageRelRo:
7658     case HLoadString::LoadKind::kBssEntry:
7659       DCHECK(!GetCompilerOptions().IsJitCompiler());
7660       break;
7661     case HLoadString::LoadKind::kJitBootImageAddress:
7662     case HLoadString::LoadKind::kJitTableAddress:
7663       DCHECK(GetCompilerOptions().IsJitCompiler());
7664       break;
7665     case HLoadString::LoadKind::kRuntimeCall:
7666       break;
7667   }
7668   return desired_string_load_kind;
7669 }
7670 
VisitLoadString(HLoadString * load)7671 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
7672   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7673   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7674   HLoadString::LoadKind load_kind = load->GetLoadKind();
7675   if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7676     locations->SetOut(LocationFrom(r0));
7677   } else {
7678     locations->SetOut(Location::RequiresRegister());
7679     if (load_kind == HLoadString::LoadKind::kBssEntry) {
7680       if (!kUseReadBarrier || kUseBakerReadBarrier) {
7681         // Rely on the pResolveString and marking to save everything we need, including temps.
7682         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7683       } else {
7684         // For non-Baker read barrier we have a temp-clobbering call.
7685       }
7686     }
7687   }
7688 }
7689 
7690 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7691 // move.
VisitLoadString(HLoadString * load)7692 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7693   LocationSummary* locations = load->GetLocations();
7694   Location out_loc = locations->Out();
7695   vixl32::Register out = OutputRegister(load);
7696   HLoadString::LoadKind load_kind = load->GetLoadKind();
7697 
7698   switch (load_kind) {
7699     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7700       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7701              codegen_->GetCompilerOptions().IsBootImageExtension());
7702       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7703           codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
7704       codegen_->EmitMovwMovtPlaceholder(labels, out);
7705       return;
7706     }
7707     case HLoadString::LoadKind::kBootImageRelRo: {
7708       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7709       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7710           codegen_->NewBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
7711       codegen_->EmitMovwMovtPlaceholder(labels, out);
7712       __ Ldr(out, MemOperand(out, /* offset= */ 0));
7713       return;
7714     }
7715     case HLoadString::LoadKind::kBssEntry: {
7716       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7717           codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
7718       codegen_->EmitMovwMovtPlaceholder(labels, out);
7719       // All aligned loads are implicitly atomic consume operations on ARM.
7720       codegen_->GenerateGcRootFieldLoad(
7721           load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption);
7722       LoadStringSlowPathARMVIXL* slow_path =
7723           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
7724       codegen_->AddSlowPath(slow_path);
7725       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7726       __ Bind(slow_path->GetExitLabel());
7727       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17);
7728       return;
7729     }
7730     case HLoadString::LoadKind::kJitBootImageAddress: {
7731       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7732       DCHECK_NE(address, 0u);
7733       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7734       return;
7735     }
7736     case HLoadString::LoadKind::kJitTableAddress: {
7737       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
7738                                                         load->GetStringIndex(),
7739                                                         load->GetString()));
7740       // /* GcRoot<mirror::String> */ out = *out
7741       codegen_->GenerateGcRootFieldLoad(
7742           load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption);
7743       return;
7744     }
7745     default:
7746       break;
7747   }
7748 
7749   // TODO: Re-add the compiler code to do string dex cache lookup again.
7750   DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
7751   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7752   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
7753   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7754   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7755   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18);
7756 }
7757 
GetExceptionTlsOffset()7758 static int32_t GetExceptionTlsOffset() {
7759   return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
7760 }
7761 
VisitLoadException(HLoadException * load)7762 void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
7763   LocationSummary* locations =
7764       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7765   locations->SetOut(Location::RequiresRegister());
7766 }
7767 
VisitLoadException(HLoadException * load)7768 void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
7769   vixl32::Register out = OutputRegister(load);
7770   GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
7771 }
7772 
7773 
VisitClearException(HClearException * clear)7774 void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
7775   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7776 }
7777 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7778 void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7779   UseScratchRegisterScope temps(GetVIXLAssembler());
7780   vixl32::Register temp = temps.Acquire();
7781   __ Mov(temp, 0);
7782   GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
7783 }
7784 
VisitThrow(HThrow * instruction)7785 void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
7786   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7787       instruction, LocationSummary::kCallOnMainOnly);
7788   InvokeRuntimeCallingConventionARMVIXL calling_convention;
7789   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
7790 }
7791 
VisitThrow(HThrow * instruction)7792 void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
7793   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7794   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7795 }
7796 
7797 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7798 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7799   if (kEmitCompilerReadBarrier &&
7800        (kUseBakerReadBarrier ||
7801           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7802           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7803           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7804     return 1;
7805   }
7806   return 0;
7807 }
7808 
7809 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
7810 // interface pointer, one for loading the current interface.
7811 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7812 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7813   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7814     return 3;
7815   }
7816   return 1 + NumberOfInstanceOfTemps(type_check_kind);
7817 }
7818 
VisitInstanceOf(HInstanceOf * instruction)7819 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
7820   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7821   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7822   bool baker_read_barrier_slow_path = false;
7823   switch (type_check_kind) {
7824     case TypeCheckKind::kExactCheck:
7825     case TypeCheckKind::kAbstractClassCheck:
7826     case TypeCheckKind::kClassHierarchyCheck:
7827     case TypeCheckKind::kArrayObjectCheck: {
7828       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7829       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7830       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7831       break;
7832     }
7833     case TypeCheckKind::kArrayCheck:
7834     case TypeCheckKind::kUnresolvedCheck:
7835     case TypeCheckKind::kInterfaceCheck:
7836       call_kind = LocationSummary::kCallOnSlowPath;
7837       break;
7838     case TypeCheckKind::kBitstringCheck:
7839       break;
7840   }
7841 
7842   LocationSummary* locations =
7843       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7844   if (baker_read_barrier_slow_path) {
7845     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7846   }
7847   locations->SetInAt(0, Location::RequiresRegister());
7848   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7849     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7850     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7851     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7852   } else {
7853     locations->SetInAt(1, Location::RequiresRegister());
7854   }
7855   // The "out" register is used as a temporary, so it overlaps with the inputs.
7856   // Note that TypeCheckSlowPathARM uses this register too.
7857   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
7858   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7859 }
7860 
VisitInstanceOf(HInstanceOf * instruction)7861 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
7862   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7863   LocationSummary* locations = instruction->GetLocations();
7864   Location obj_loc = locations->InAt(0);
7865   vixl32::Register obj = InputRegisterAt(instruction, 0);
7866   vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
7867       ? vixl32::Register()
7868       : InputRegisterAt(instruction, 1);
7869   Location out_loc = locations->Out();
7870   vixl32::Register out = OutputRegister(instruction);
7871   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7872   DCHECK_LE(num_temps, 1u);
7873   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7874   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7875   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7876   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7877   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7878   vixl32::Label done;
7879   vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
7880   SlowPathCodeARMVIXL* slow_path = nullptr;
7881 
7882   // Return 0 if `obj` is null.
7883   // avoid null check if we know obj is not null.
7884   if (instruction->MustDoNullCheck()) {
7885     DCHECK(!out.Is(obj));
7886     __ Mov(out, 0);
7887     __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
7888   }
7889 
7890   switch (type_check_kind) {
7891     case TypeCheckKind::kExactCheck: {
7892       ReadBarrierOption read_barrier_option =
7893           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7894       // /* HeapReference<Class> */ out = obj->klass_
7895       GenerateReferenceLoadTwoRegisters(instruction,
7896                                         out_loc,
7897                                         obj_loc,
7898                                         class_offset,
7899                                         maybe_temp_loc,
7900                                         read_barrier_option);
7901       // Classes must be equal for the instanceof to succeed.
7902       __ Cmp(out, cls);
7903       // We speculatively set the result to false without changing the condition
7904       // flags, which allows us to avoid some branching later.
7905       __ Mov(LeaveFlags, out, 0);
7906 
7907       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
7908       // we check that the output is in a low register, so that a 16-bit MOV
7909       // encoding can be used.
7910       if (out.IsLow()) {
7911         // We use the scope because of the IT block that follows.
7912         ExactAssemblyScope guard(GetVIXLAssembler(),
7913                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
7914                                  CodeBufferCheckScope::kExactSize);
7915 
7916         __ it(eq);
7917         __ mov(eq, out, 1);
7918       } else {
7919         __ B(ne, final_label, /* is_far_target= */ false);
7920         __ Mov(out, 1);
7921       }
7922 
7923       break;
7924     }
7925 
7926     case TypeCheckKind::kAbstractClassCheck: {
7927       ReadBarrierOption read_barrier_option =
7928           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7929       // /* HeapReference<Class> */ out = obj->klass_
7930       GenerateReferenceLoadTwoRegisters(instruction,
7931                                         out_loc,
7932                                         obj_loc,
7933                                         class_offset,
7934                                         maybe_temp_loc,
7935                                         read_barrier_option);
7936       // If the class is abstract, we eagerly fetch the super class of the
7937       // object to avoid doing a comparison we know will fail.
7938       vixl32::Label loop;
7939       __ Bind(&loop);
7940       // /* HeapReference<Class> */ out = out->super_class_
7941       GenerateReferenceLoadOneRegister(instruction,
7942                                        out_loc,
7943                                        super_offset,
7944                                        maybe_temp_loc,
7945                                        read_barrier_option);
7946       // If `out` is null, we use it for the result, and jump to the final label.
7947       __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
7948       __ Cmp(out, cls);
7949       __ B(ne, &loop, /* is_far_target= */ false);
7950       __ Mov(out, 1);
7951       break;
7952     }
7953 
7954     case TypeCheckKind::kClassHierarchyCheck: {
7955       ReadBarrierOption read_barrier_option =
7956           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7957       // /* HeapReference<Class> */ out = obj->klass_
7958       GenerateReferenceLoadTwoRegisters(instruction,
7959                                         out_loc,
7960                                         obj_loc,
7961                                         class_offset,
7962                                         maybe_temp_loc,
7963                                         read_barrier_option);
7964       // Walk over the class hierarchy to find a match.
7965       vixl32::Label loop, success;
7966       __ Bind(&loop);
7967       __ Cmp(out, cls);
7968       __ B(eq, &success, /* is_far_target= */ false);
7969       // /* HeapReference<Class> */ out = out->super_class_
7970       GenerateReferenceLoadOneRegister(instruction,
7971                                        out_loc,
7972                                        super_offset,
7973                                        maybe_temp_loc,
7974                                        read_barrier_option);
7975       // This is essentially a null check, but it sets the condition flags to the
7976       // proper value for the code that follows the loop, i.e. not `eq`.
7977       __ Cmp(out, 1);
7978       __ B(hs, &loop, /* is_far_target= */ false);
7979 
7980       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
7981       // we check that the output is in a low register, so that a 16-bit MOV
7982       // encoding can be used.
7983       if (out.IsLow()) {
7984         // If `out` is null, we use it for the result, and the condition flags
7985         // have already been set to `ne`, so the IT block that comes afterwards
7986         // (and which handles the successful case) turns into a NOP (instead of
7987         // overwriting `out`).
7988         __ Bind(&success);
7989 
7990         // We use the scope because of the IT block that follows.
7991         ExactAssemblyScope guard(GetVIXLAssembler(),
7992                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
7993                                  CodeBufferCheckScope::kExactSize);
7994 
7995         // There is only one branch to the `success` label (which is bound to this
7996         // IT block), and it has the same condition, `eq`, so in that case the MOV
7997         // is executed.
7998         __ it(eq);
7999         __ mov(eq, out, 1);
8000       } else {
8001         // If `out` is null, we use it for the result, and jump to the final label.
8002         __ B(final_label);
8003         __ Bind(&success);
8004         __ Mov(out, 1);
8005       }
8006 
8007       break;
8008     }
8009 
8010     case TypeCheckKind::kArrayObjectCheck: {
8011       ReadBarrierOption read_barrier_option =
8012           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
8013       // /* HeapReference<Class> */ out = obj->klass_
8014       GenerateReferenceLoadTwoRegisters(instruction,
8015                                         out_loc,
8016                                         obj_loc,
8017                                         class_offset,
8018                                         maybe_temp_loc,
8019                                         read_barrier_option);
8020       // Do an exact check.
8021       vixl32::Label exact_check;
8022       __ Cmp(out, cls);
8023       __ B(eq, &exact_check, /* is_far_target= */ false);
8024       // Otherwise, we need to check that the object's class is a non-primitive array.
8025       // /* HeapReference<Class> */ out = out->component_type_
8026       GenerateReferenceLoadOneRegister(instruction,
8027                                        out_loc,
8028                                        component_offset,
8029                                        maybe_temp_loc,
8030                                        read_barrier_option);
8031       // If `out` is null, we use it for the result, and jump to the final label.
8032       __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
8033       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
8034       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8035       __ Cmp(out, 0);
8036       // We speculatively set the result to false without changing the condition
8037       // flags, which allows us to avoid some branching later.
8038       __ Mov(LeaveFlags, out, 0);
8039 
8040       // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
8041       // we check that the output is in a low register, so that a 16-bit MOV
8042       // encoding can be used.
8043       if (out.IsLow()) {
8044         __ Bind(&exact_check);
8045 
8046         // We use the scope because of the IT block that follows.
8047         ExactAssemblyScope guard(GetVIXLAssembler(),
8048                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
8049                                  CodeBufferCheckScope::kExactSize);
8050 
8051         __ it(eq);
8052         __ mov(eq, out, 1);
8053       } else {
8054         __ B(ne, final_label, /* is_far_target= */ false);
8055         __ Bind(&exact_check);
8056         __ Mov(out, 1);
8057       }
8058 
8059       break;
8060     }
8061 
8062     case TypeCheckKind::kArrayCheck: {
8063       // No read barrier since the slow path will retry upon failure.
8064       // /* HeapReference<Class> */ out = obj->klass_
8065       GenerateReferenceLoadTwoRegisters(instruction,
8066                                         out_loc,
8067                                         obj_loc,
8068                                         class_offset,
8069                                         maybe_temp_loc,
8070                                         kWithoutReadBarrier);
8071       __ Cmp(out, cls);
8072       DCHECK(locations->OnlyCallsOnSlowPath());
8073       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8074           instruction, /* is_fatal= */ false);
8075       codegen_->AddSlowPath(slow_path);
8076       __ B(ne, slow_path->GetEntryLabel());
8077       __ Mov(out, 1);
8078       break;
8079     }
8080 
8081     case TypeCheckKind::kUnresolvedCheck:
8082     case TypeCheckKind::kInterfaceCheck: {
8083       // Note that we indeed only call on slow path, but we always go
8084       // into the slow path for the unresolved and interface check
8085       // cases.
8086       //
8087       // We cannot directly call the InstanceofNonTrivial runtime
8088       // entry point without resorting to a type checking slow path
8089       // here (i.e. by calling InvokeRuntime directly), as it would
8090       // require to assign fixed registers for the inputs of this
8091       // HInstanceOf instruction (following the runtime calling
8092       // convention), which might be cluttered by the potential first
8093       // read barrier emission at the beginning of this method.
8094       //
8095       // TODO: Introduce a new runtime entry point taking the object
8096       // to test (instead of its class) as argument, and let it deal
8097       // with the read barrier issues. This will let us refactor this
8098       // case of the `switch` code as it was previously (with a direct
8099       // call to the runtime not using a type checking slow path).
8100       // This should also be beneficial for the other cases above.
8101       DCHECK(locations->OnlyCallsOnSlowPath());
8102       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8103           instruction, /* is_fatal= */ false);
8104       codegen_->AddSlowPath(slow_path);
8105       __ B(slow_path->GetEntryLabel());
8106       break;
8107     }
8108 
8109     case TypeCheckKind::kBitstringCheck: {
8110       // /* HeapReference<Class> */ temp = obj->klass_
8111       GenerateReferenceLoadTwoRegisters(instruction,
8112                                         out_loc,
8113                                         obj_loc,
8114                                         class_offset,
8115                                         maybe_temp_loc,
8116                                         kWithoutReadBarrier);
8117 
8118       GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
8119       // If `out` is a low reg and we would have another low reg temp, we could
8120       // optimize this as RSBS+ADC, see GenerateConditionWithZero().
8121       //
8122       // Also, in some cases when `out` is a low reg and we're loading a constant to IP
8123       // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
8124       // would be the same and we would have fewer direct data dependencies.
8125       codegen_->GenerateConditionWithZero(kCondEQ, out, out);  // CLZ+LSR
8126       break;
8127     }
8128   }
8129 
8130   if (done.IsReferenced()) {
8131     __ Bind(&done);
8132   }
8133 
8134   if (slow_path != nullptr) {
8135     __ Bind(slow_path->GetExitLabel());
8136   }
8137 }
8138 
VisitCheckCast(HCheckCast * instruction)8139 void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8140   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8141   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
8142   LocationSummary* locations =
8143       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8144   locations->SetInAt(0, Location::RequiresRegister());
8145   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8146     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
8147     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
8148     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
8149   } else {
8150     locations->SetInAt(1, Location::RequiresRegister());
8151   }
8152   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
8153 }
8154 
VisitCheckCast(HCheckCast * instruction)8155 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8156   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8157   LocationSummary* locations = instruction->GetLocations();
8158   Location obj_loc = locations->InAt(0);
8159   vixl32::Register obj = InputRegisterAt(instruction, 0);
8160   vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8161       ? vixl32::Register()
8162       : InputRegisterAt(instruction, 1);
8163   Location temp_loc = locations->GetTemp(0);
8164   vixl32::Register temp = RegisterFrom(temp_loc);
8165   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
8166   DCHECK_LE(num_temps, 3u);
8167   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8168   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
8169   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8170   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8171   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8172   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8173   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8174   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8175   const uint32_t object_array_data_offset =
8176       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8177 
8178   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
8179   SlowPathCodeARMVIXL* type_check_slow_path =
8180       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8181           instruction, is_type_check_slow_path_fatal);
8182   codegen_->AddSlowPath(type_check_slow_path);
8183 
8184   vixl32::Label done;
8185   vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
8186   // Avoid null check if we know obj is not null.
8187   if (instruction->MustDoNullCheck()) {
8188     __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8189   }
8190 
8191   switch (type_check_kind) {
8192     case TypeCheckKind::kExactCheck:
8193     case TypeCheckKind::kArrayCheck: {
8194       // /* HeapReference<Class> */ temp = obj->klass_
8195       GenerateReferenceLoadTwoRegisters(instruction,
8196                                         temp_loc,
8197                                         obj_loc,
8198                                         class_offset,
8199                                         maybe_temp2_loc,
8200                                         kWithoutReadBarrier);
8201 
8202       __ Cmp(temp, cls);
8203       // Jump to slow path for throwing the exception or doing a
8204       // more involved array check.
8205       __ B(ne, type_check_slow_path->GetEntryLabel());
8206       break;
8207     }
8208 
8209     case TypeCheckKind::kAbstractClassCheck: {
8210       // /* HeapReference<Class> */ temp = obj->klass_
8211       GenerateReferenceLoadTwoRegisters(instruction,
8212                                         temp_loc,
8213                                         obj_loc,
8214                                         class_offset,
8215                                         maybe_temp2_loc,
8216                                         kWithoutReadBarrier);
8217 
8218       // If the class is abstract, we eagerly fetch the super class of the
8219       // object to avoid doing a comparison we know will fail.
8220       vixl32::Label loop;
8221       __ Bind(&loop);
8222       // /* HeapReference<Class> */ temp = temp->super_class_
8223       GenerateReferenceLoadOneRegister(instruction,
8224                                        temp_loc,
8225                                        super_offset,
8226                                        maybe_temp2_loc,
8227                                        kWithoutReadBarrier);
8228 
8229       // If the class reference currently in `temp` is null, jump to the slow path to throw the
8230       // exception.
8231       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8232 
8233       // Otherwise, compare the classes.
8234       __ Cmp(temp, cls);
8235       __ B(ne, &loop, /* is_far_target= */ false);
8236       break;
8237     }
8238 
8239     case TypeCheckKind::kClassHierarchyCheck: {
8240       // /* HeapReference<Class> */ temp = obj->klass_
8241       GenerateReferenceLoadTwoRegisters(instruction,
8242                                         temp_loc,
8243                                         obj_loc,
8244                                         class_offset,
8245                                         maybe_temp2_loc,
8246                                         kWithoutReadBarrier);
8247 
8248       // Walk over the class hierarchy to find a match.
8249       vixl32::Label loop;
8250       __ Bind(&loop);
8251       __ Cmp(temp, cls);
8252       __ B(eq, final_label, /* is_far_target= */ false);
8253 
8254       // /* HeapReference<Class> */ temp = temp->super_class_
8255       GenerateReferenceLoadOneRegister(instruction,
8256                                        temp_loc,
8257                                        super_offset,
8258                                        maybe_temp2_loc,
8259                                        kWithoutReadBarrier);
8260 
8261       // If the class reference currently in `temp` is null, jump to the slow path to throw the
8262       // exception.
8263       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8264       // Otherwise, jump to the beginning of the loop.
8265       __ B(&loop);
8266       break;
8267     }
8268 
8269     case TypeCheckKind::kArrayObjectCheck:  {
8270       // /* HeapReference<Class> */ temp = obj->klass_
8271       GenerateReferenceLoadTwoRegisters(instruction,
8272                                         temp_loc,
8273                                         obj_loc,
8274                                         class_offset,
8275                                         maybe_temp2_loc,
8276                                         kWithoutReadBarrier);
8277 
8278       // Do an exact check.
8279       __ Cmp(temp, cls);
8280       __ B(eq, final_label, /* is_far_target= */ false);
8281 
8282       // Otherwise, we need to check that the object's class is a non-primitive array.
8283       // /* HeapReference<Class> */ temp = temp->component_type_
8284       GenerateReferenceLoadOneRegister(instruction,
8285                                        temp_loc,
8286                                        component_offset,
8287                                        maybe_temp2_loc,
8288                                        kWithoutReadBarrier);
8289       // If the component type is null, jump to the slow path to throw the exception.
8290       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8291       // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
8292       // to further check that this component type is not a primitive type.
8293       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
8294       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8295       __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
8296       break;
8297     }
8298 
8299     case TypeCheckKind::kUnresolvedCheck:
8300       // We always go into the type check slow path for the unresolved check case.
8301       // We cannot directly call the CheckCast runtime entry point
8302       // without resorting to a type checking slow path here (i.e. by
8303       // calling InvokeRuntime directly), as it would require to
8304       // assign fixed registers for the inputs of this HInstanceOf
8305       // instruction (following the runtime calling convention), which
8306       // might be cluttered by the potential first read barrier
8307       // emission at the beginning of this method.
8308 
8309       __ B(type_check_slow_path->GetEntryLabel());
8310       break;
8311 
8312     case TypeCheckKind::kInterfaceCheck: {
8313       // Avoid read barriers to improve performance of the fast path. We can not get false
8314       // positives by doing this.
8315       // /* HeapReference<Class> */ temp = obj->klass_
8316       GenerateReferenceLoadTwoRegisters(instruction,
8317                                         temp_loc,
8318                                         obj_loc,
8319                                         class_offset,
8320                                         maybe_temp2_loc,
8321                                         kWithoutReadBarrier);
8322 
8323       // /* HeapReference<Class> */ temp = temp->iftable_
8324       GenerateReferenceLoadTwoRegisters(instruction,
8325                                         temp_loc,
8326                                         temp_loc,
8327                                         iftable_offset,
8328                                         maybe_temp2_loc,
8329                                         kWithoutReadBarrier);
8330       // Iftable is never null.
8331       __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
8332       // Loop through the iftable and check if any class matches.
8333       vixl32::Label start_loop;
8334       __ Bind(&start_loop);
8335       __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
8336                                 type_check_slow_path->GetEntryLabel());
8337       __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
8338       GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
8339       // Go to next interface.
8340       __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
8341       __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
8342       // Compare the classes and continue the loop if they do not match.
8343       __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
8344       __ B(ne, &start_loop, /* is_far_target= */ false);
8345       break;
8346     }
8347 
8348     case TypeCheckKind::kBitstringCheck: {
8349       // /* HeapReference<Class> */ temp = obj->klass_
8350       GenerateReferenceLoadTwoRegisters(instruction,
8351                                         temp_loc,
8352                                         obj_loc,
8353                                         class_offset,
8354                                         maybe_temp2_loc,
8355                                         kWithoutReadBarrier);
8356 
8357       GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
8358       __ B(ne, type_check_slow_path->GetEntryLabel());
8359       break;
8360     }
8361   }
8362   if (done.IsReferenced()) {
8363     __ Bind(&done);
8364   }
8365 
8366   __ Bind(type_check_slow_path->GetExitLabel());
8367 }
8368 
VisitMonitorOperation(HMonitorOperation * instruction)8369 void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8370   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8371       instruction, LocationSummary::kCallOnMainOnly);
8372   InvokeRuntimeCallingConventionARMVIXL calling_convention;
8373   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8374 }
8375 
VisitMonitorOperation(HMonitorOperation * instruction)8376 void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8377   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
8378                           instruction,
8379                           instruction->GetDexPc());
8380   if (instruction->IsEnter()) {
8381     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8382   } else {
8383     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8384   }
8385   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 19);
8386 }
8387 
VisitAnd(HAnd * instruction)8388 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
8389   HandleBitwiseOperation(instruction, AND);
8390 }
8391 
VisitOr(HOr * instruction)8392 void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
8393   HandleBitwiseOperation(instruction, ORR);
8394 }
8395 
VisitXor(HXor * instruction)8396 void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
8397   HandleBitwiseOperation(instruction, EOR);
8398 }
8399 
HandleBitwiseOperation(HBinaryOperation * instruction,Opcode opcode)8400 void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
8401   LocationSummary* locations =
8402       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8403   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8404          || instruction->GetResultType() == DataType::Type::kInt64);
8405   // Note: GVN reorders commutative operations to have the constant on the right hand side.
8406   locations->SetInAt(0, Location::RequiresRegister());
8407   locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
8408   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8409 }
8410 
VisitAnd(HAnd * instruction)8411 void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
8412   HandleBitwiseOperation(instruction);
8413 }
8414 
VisitOr(HOr * instruction)8415 void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
8416   HandleBitwiseOperation(instruction);
8417 }
8418 
VisitXor(HXor * instruction)8419 void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
8420   HandleBitwiseOperation(instruction);
8421 }
8422 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8423 void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8424   LocationSummary* locations =
8425       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8426   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8427          || instruction->GetResultType() == DataType::Type::kInt64);
8428 
8429   locations->SetInAt(0, Location::RequiresRegister());
8430   locations->SetInAt(1, Location::RequiresRegister());
8431   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8432 }
8433 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8434 void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8435   LocationSummary* locations = instruction->GetLocations();
8436   Location first = locations->InAt(0);
8437   Location second = locations->InAt(1);
8438   Location out = locations->Out();
8439 
8440   if (instruction->GetResultType() == DataType::Type::kInt32) {
8441     vixl32::Register first_reg = RegisterFrom(first);
8442     vixl32::Register second_reg = RegisterFrom(second);
8443     vixl32::Register out_reg = RegisterFrom(out);
8444 
8445     switch (instruction->GetOpKind()) {
8446       case HInstruction::kAnd:
8447         __ Bic(out_reg, first_reg, second_reg);
8448         break;
8449       case HInstruction::kOr:
8450         __ Orn(out_reg, first_reg, second_reg);
8451         break;
8452       // There is no EON on arm.
8453       case HInstruction::kXor:
8454       default:
8455         LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8456         UNREACHABLE();
8457     }
8458     return;
8459 
8460   } else {
8461     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8462     vixl32::Register first_low = LowRegisterFrom(first);
8463     vixl32::Register first_high = HighRegisterFrom(first);
8464     vixl32::Register second_low = LowRegisterFrom(second);
8465     vixl32::Register second_high = HighRegisterFrom(second);
8466     vixl32::Register out_low = LowRegisterFrom(out);
8467     vixl32::Register out_high = HighRegisterFrom(out);
8468 
8469     switch (instruction->GetOpKind()) {
8470       case HInstruction::kAnd:
8471         __ Bic(out_low, first_low, second_low);
8472         __ Bic(out_high, first_high, second_high);
8473         break;
8474       case HInstruction::kOr:
8475         __ Orn(out_low, first_low, second_low);
8476         __ Orn(out_high, first_high, second_high);
8477         break;
8478       // There is no EON on arm.
8479       case HInstruction::kXor:
8480       default:
8481         LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8482         UNREACHABLE();
8483     }
8484   }
8485 }
8486 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8487 void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
8488     HDataProcWithShifterOp* instruction) {
8489   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
8490          instruction->GetType() == DataType::Type::kInt64);
8491   LocationSummary* locations =
8492       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8493   const bool overlap = instruction->GetType() == DataType::Type::kInt64 &&
8494                        HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
8495 
8496   locations->SetInAt(0, Location::RequiresRegister());
8497   locations->SetInAt(1, Location::RequiresRegister());
8498   locations->SetOut(Location::RequiresRegister(),
8499                     overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
8500 }
8501 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8502 void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
8503     HDataProcWithShifterOp* instruction) {
8504   const LocationSummary* const locations = instruction->GetLocations();
8505   const HInstruction::InstructionKind kind = instruction->GetInstrKind();
8506   const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
8507 
8508   if (instruction->GetType() == DataType::Type::kInt32) {
8509     const vixl32::Register first = InputRegisterAt(instruction, 0);
8510     const vixl32::Register output = OutputRegister(instruction);
8511     const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64
8512         ? LowRegisterFrom(locations->InAt(1))
8513         : InputRegisterAt(instruction, 1);
8514 
8515     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8516       DCHECK_EQ(kind, HInstruction::kAdd);
8517 
8518       switch (op_kind) {
8519         case HDataProcWithShifterOp::kUXTB:
8520           __ Uxtab(output, first, second);
8521           break;
8522         case HDataProcWithShifterOp::kUXTH:
8523           __ Uxtah(output, first, second);
8524           break;
8525         case HDataProcWithShifterOp::kSXTB:
8526           __ Sxtab(output, first, second);
8527           break;
8528         case HDataProcWithShifterOp::kSXTH:
8529           __ Sxtah(output, first, second);
8530           break;
8531         default:
8532           LOG(FATAL) << "Unexpected operation kind: " << op_kind;
8533           UNREACHABLE();
8534       }
8535     } else {
8536       GenerateDataProcInstruction(kind,
8537                                   output,
8538                                   first,
8539                                   Operand(second,
8540                                           ShiftFromOpKind(op_kind),
8541                                           instruction->GetShiftAmount()),
8542                                   codegen_);
8543     }
8544   } else {
8545     DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
8546 
8547     if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8548       const vixl32::Register second = InputRegisterAt(instruction, 1);
8549 
8550       DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
8551       GenerateDataProc(kind,
8552                        locations->Out(),
8553                        locations->InAt(0),
8554                        second,
8555                        Operand(second, ShiftType::ASR, 31),
8556                        codegen_);
8557     } else {
8558       GenerateLongDataProc(instruction, codegen_);
8559     }
8560   }
8561 }
8562 
8563 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateAndConst(vixl32::Register out,vixl32::Register first,uint32_t value)8564 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
8565                                                        vixl32::Register first,
8566                                                        uint32_t value) {
8567   // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
8568   if (value == 0xffffffffu) {
8569     if (!out.Is(first)) {
8570       __ Mov(out, first);
8571     }
8572     return;
8573   }
8574   if (value == 0u) {
8575     __ Mov(out, 0);
8576     return;
8577   }
8578   if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
8579     __ And(out, first, value);
8580   } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
8581     __ Bic(out, first, ~value);
8582   } else {
8583     DCHECK(IsPowerOfTwo(value + 1));
8584     __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
8585   }
8586 }
8587 
8588 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateOrrConst(vixl32::Register out,vixl32::Register first,uint32_t value)8589 void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
8590                                                        vixl32::Register first,
8591                                                        uint32_t value) {
8592   // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
8593   if (value == 0u) {
8594     if (!out.Is(first)) {
8595       __ Mov(out, first);
8596     }
8597     return;
8598   }
8599   if (value == 0xffffffffu) {
8600     __ Mvn(out, 0);
8601     return;
8602   }
8603   if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
8604     __ Orr(out, first, value);
8605   } else {
8606     DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
8607     __ Orn(out, first, ~value);
8608   }
8609 }
8610 
8611 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateEorConst(vixl32::Register out,vixl32::Register first,uint32_t value)8612 void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
8613                                                        vixl32::Register first,
8614                                                        uint32_t value) {
8615   // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
8616   if (value == 0u) {
8617     if (!out.Is(first)) {
8618       __ Mov(out, first);
8619     }
8620     return;
8621   }
8622   __ Eor(out, first, value);
8623 }
8624 
GenerateAddLongConst(Location out,Location first,uint64_t value)8625 void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out,
8626                                                            Location first,
8627                                                            uint64_t value) {
8628   vixl32::Register out_low = LowRegisterFrom(out);
8629   vixl32::Register out_high = HighRegisterFrom(out);
8630   vixl32::Register first_low = LowRegisterFrom(first);
8631   vixl32::Register first_high = HighRegisterFrom(first);
8632   uint32_t value_low = Low32Bits(value);
8633   uint32_t value_high = High32Bits(value);
8634   if (value_low == 0u) {
8635     if (!out_low.Is(first_low)) {
8636       __ Mov(out_low, first_low);
8637     }
8638     __ Add(out_high, first_high, value_high);
8639     return;
8640   }
8641   __ Adds(out_low, first_low, value_low);
8642   if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) {
8643     __ Adc(out_high, first_high, value_high);
8644   } else {
8645     DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high));
8646     __ Sbc(out_high, first_high, ~value_high);
8647   }
8648 }
8649 
HandleBitwiseOperation(HBinaryOperation * instruction)8650 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
8651   LocationSummary* locations = instruction->GetLocations();
8652   Location first = locations->InAt(0);
8653   Location second = locations->InAt(1);
8654   Location out = locations->Out();
8655 
8656   if (second.IsConstant()) {
8657     uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
8658     uint32_t value_low = Low32Bits(value);
8659     if (instruction->GetResultType() == DataType::Type::kInt32) {
8660       vixl32::Register first_reg = InputRegisterAt(instruction, 0);
8661       vixl32::Register out_reg = OutputRegister(instruction);
8662       if (instruction->IsAnd()) {
8663         GenerateAndConst(out_reg, first_reg, value_low);
8664       } else if (instruction->IsOr()) {
8665         GenerateOrrConst(out_reg, first_reg, value_low);
8666       } else {
8667         DCHECK(instruction->IsXor());
8668         GenerateEorConst(out_reg, first_reg, value_low);
8669       }
8670     } else {
8671       DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8672       uint32_t value_high = High32Bits(value);
8673       vixl32::Register first_low = LowRegisterFrom(first);
8674       vixl32::Register first_high = HighRegisterFrom(first);
8675       vixl32::Register out_low = LowRegisterFrom(out);
8676       vixl32::Register out_high = HighRegisterFrom(out);
8677       if (instruction->IsAnd()) {
8678         GenerateAndConst(out_low, first_low, value_low);
8679         GenerateAndConst(out_high, first_high, value_high);
8680       } else if (instruction->IsOr()) {
8681         GenerateOrrConst(out_low, first_low, value_low);
8682         GenerateOrrConst(out_high, first_high, value_high);
8683       } else {
8684         DCHECK(instruction->IsXor());
8685         GenerateEorConst(out_low, first_low, value_low);
8686         GenerateEorConst(out_high, first_high, value_high);
8687       }
8688     }
8689     return;
8690   }
8691 
8692   if (instruction->GetResultType() == DataType::Type::kInt32) {
8693     vixl32::Register first_reg = InputRegisterAt(instruction, 0);
8694     vixl32::Register second_reg = InputRegisterAt(instruction, 1);
8695     vixl32::Register out_reg = OutputRegister(instruction);
8696     if (instruction->IsAnd()) {
8697       __ And(out_reg, first_reg, second_reg);
8698     } else if (instruction->IsOr()) {
8699       __ Orr(out_reg, first_reg, second_reg);
8700     } else {
8701       DCHECK(instruction->IsXor());
8702       __ Eor(out_reg, first_reg, second_reg);
8703     }
8704   } else {
8705     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8706     vixl32::Register first_low = LowRegisterFrom(first);
8707     vixl32::Register first_high = HighRegisterFrom(first);
8708     vixl32::Register second_low = LowRegisterFrom(second);
8709     vixl32::Register second_high = HighRegisterFrom(second);
8710     vixl32::Register out_low = LowRegisterFrom(out);
8711     vixl32::Register out_high = HighRegisterFrom(out);
8712     if (instruction->IsAnd()) {
8713       __ And(out_low, first_low, second_low);
8714       __ And(out_high, first_high, second_high);
8715     } else if (instruction->IsOr()) {
8716       __ Orr(out_low, first_low, second_low);
8717       __ Orr(out_high, first_high, second_high);
8718     } else {
8719       DCHECK(instruction->IsXor());
8720       __ Eor(out_low, first_low, second_low);
8721       __ Eor(out_high, first_high, second_high);
8722     }
8723   }
8724 }
8725 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8726 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
8727     HInstruction* instruction,
8728     Location out,
8729     uint32_t offset,
8730     Location maybe_temp,
8731     ReadBarrierOption read_barrier_option) {
8732   vixl32::Register out_reg = RegisterFrom(out);
8733   if (read_barrier_option == kWithReadBarrier) {
8734     CHECK(kEmitCompilerReadBarrier);
8735     DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8736     if (kUseBakerReadBarrier) {
8737       // Load with fast path based Baker's read barrier.
8738       // /* HeapReference<Object> */ out = *(out + offset)
8739       codegen_->GenerateFieldLoadWithBakerReadBarrier(
8740           instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false);
8741     } else {
8742       // Load with slow path based read barrier.
8743       // Save the value of `out` into `maybe_temp` before overwriting it
8744       // in the following move operation, as we will need it for the
8745       // read barrier below.
8746       __ Mov(RegisterFrom(maybe_temp), out_reg);
8747       // /* HeapReference<Object> */ out = *(out + offset)
8748       GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
8749       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8750     }
8751   } else {
8752     // Plain load with no read barrier.
8753     // /* HeapReference<Object> */ out = *(out + offset)
8754     GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
8755     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
8756   }
8757 }
8758 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8759 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
8760     HInstruction* instruction,
8761     Location out,
8762     Location obj,
8763     uint32_t offset,
8764     Location maybe_temp,
8765     ReadBarrierOption read_barrier_option) {
8766   vixl32::Register out_reg = RegisterFrom(out);
8767   vixl32::Register obj_reg = RegisterFrom(obj);
8768   if (read_barrier_option == kWithReadBarrier) {
8769     CHECK(kEmitCompilerReadBarrier);
8770     if (kUseBakerReadBarrier) {
8771       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8772       // Load with fast path based Baker's read barrier.
8773       // /* HeapReference<Object> */ out = *(obj + offset)
8774       codegen_->GenerateFieldLoadWithBakerReadBarrier(
8775           instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false);
8776     } else {
8777       // Load with slow path based read barrier.
8778       // /* HeapReference<Object> */ out = *(obj + offset)
8779       GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
8780       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8781     }
8782   } else {
8783     // Plain load with no read barrier.
8784     // /* HeapReference<Object> */ out = *(obj + offset)
8785     GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
8786     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
8787   }
8788 }
8789 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,vixl32::Register obj,uint32_t offset,ReadBarrierOption read_barrier_option)8790 void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
8791     HInstruction* instruction,
8792     Location root,
8793     vixl32::Register obj,
8794     uint32_t offset,
8795     ReadBarrierOption read_barrier_option) {
8796   vixl32::Register root_reg = RegisterFrom(root);
8797   if (read_barrier_option == kWithReadBarrier) {
8798     DCHECK(kEmitCompilerReadBarrier);
8799     if (kUseBakerReadBarrier) {
8800       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8801       // Baker's read barrier are used.
8802 
8803       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
8804       // the Marking Register) to decide whether we need to enter
8805       // the slow path to mark the GC root.
8806       //
8807       // We use shared thunks for the slow path; shared within the method
8808       // for JIT, across methods for AOT. That thunk checks the reference
8809       // and jumps to the entrypoint if needed.
8810       //
8811       //     lr = &return_address;
8812       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
8813       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
8814       //       goto gc_root_thunk<root_reg>(lr)
8815       //     }
8816       //   return_address:
8817 
8818       UseScratchRegisterScope temps(GetVIXLAssembler());
8819       temps.Exclude(ip);
8820       bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
8821       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
8822 
8823       size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
8824       size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
8825       size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
8826                           narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
8827       ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
8828       vixl32::Label return_address;
8829       EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
8830       __ cmp(mr, Operand(0));
8831       // Currently the offset is always within range. If that changes,
8832       // we shall have to split the load the same way as for fields.
8833       DCHECK_LT(offset, kReferenceLoadMinFarOffset);
8834       ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
8835       __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
8836       EmitBakerReadBarrierBne(custom_data);
8837       __ bind(&return_address);
8838       DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
8839                 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
8840                        : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
8841     } else {
8842       // GC root loaded through a slow path for read barriers other
8843       // than Baker's.
8844       // /* GcRoot<mirror::Object>* */ root = obj + offset
8845       __ Add(root_reg, obj, offset);
8846       // /* mirror::Object* */ root = root->Read()
8847       GenerateReadBarrierForRootSlow(instruction, root, root);
8848     }
8849   } else {
8850     // Plain GC root load with no read barrier.
8851     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
8852     GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
8853     // Note that GC roots are not affected by heap poisoning, thus we
8854     // do not have to unpoison `root_reg` here.
8855   }
8856   MaybeGenerateMarkingRegisterCheck(/* code= */ 20);
8857 }
8858 
GenerateIntrinsicCasMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value,vixl::aarch32::Register old_value)8859 void CodeGeneratorARMVIXL::GenerateIntrinsicCasMoveWithBakerReadBarrier(
8860     vixl::aarch32::Register marked_old_value,
8861     vixl::aarch32::Register old_value) {
8862   DCHECK(kEmitCompilerReadBarrier);
8863   DCHECK(kUseBakerReadBarrier);
8864 
8865   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
8866   // For low registers, we can reuse the GC root narrow entrypoint, for high registers
8867   // we use a specialized entrypoint because the register bits are 8-11 instead of 12-15.
8868   bool narrow_mov = marked_old_value.IsLow();
8869   uint32_t custom_data = narrow_mov
8870       ? EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode(), /*narrow=*/ true)
8871       : EncodeBakerReadBarrierIntrinsicCasData(marked_old_value.GetCode());
8872 
8873   size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* MOV */ (narrow_mov ? 1u : 0u);
8874   size_t wide_instructions = /* ADR+CMP+MOV+BNE */ 4u - narrow_instructions;
8875   size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
8876                       narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
8877   ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
8878   vixl32::Label return_address;
8879   EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
8880   __ cmp(mr, Operand(0));
8881   ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
8882   __ mov(EncodingSize(narrow_mov ? Narrow : Wide), marked_old_value, old_value);
8883   EmitBakerReadBarrierBne(custom_data);
8884   __ bind(&return_address);
8885   DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
8886             narrow_mov
8887                 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
8888                 : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET);
8889 }
8890 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,const vixl32::MemOperand & src,bool needs_null_check)8891 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8892                                                                  Location ref,
8893                                                                  vixl32::Register obj,
8894                                                                  const vixl32::MemOperand& src,
8895                                                                  bool needs_null_check) {
8896   DCHECK(kEmitCompilerReadBarrier);
8897   DCHECK(kUseBakerReadBarrier);
8898 
8899   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
8900   // Marking Register) to decide whether we need to enter the slow
8901   // path to mark the reference. Then, in the slow path, check the
8902   // gray bit in the lock word of the reference's holder (`obj`) to
8903   // decide whether to mark `ref` or not.
8904   //
8905   // We use shared thunks for the slow path; shared within the method
8906   // for JIT, across methods for AOT. That thunk checks the holder
8907   // and jumps to the entrypoint if needed. If the holder is not gray,
8908   // it creates a fake dependency and returns to the LDR instruction.
8909   //
8910   //     lr = &gray_return_address;
8911   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
8912   //       goto field_thunk<holder_reg, base_reg>(lr)
8913   //     }
8914   //   not_gray_return_address:
8915   //     // Original reference load. If the offset is too large to fit
8916   //     // into LDR, we use an adjusted base register here.
8917   //     HeapReference<mirror::Object> reference = *(obj+offset);
8918   //   gray_return_address:
8919 
8920   DCHECK(src.GetAddrMode() == vixl32::Offset);
8921   DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
8922   vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
8923   bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
8924 
8925   UseScratchRegisterScope temps(GetVIXLAssembler());
8926   temps.Exclude(ip);
8927   uint32_t custom_data =
8928       EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
8929 
8930   {
8931     size_t narrow_instructions =
8932         /* CMP */ (mr.IsLow() ? 1u : 0u) +
8933         /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
8934     size_t wide_instructions =
8935         /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
8936     size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
8937                         narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
8938     ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
8939     vixl32::Label return_address;
8940     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
8941     __ cmp(mr, Operand(0));
8942     EmitBakerReadBarrierBne(custom_data);
8943     ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
8944     __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
8945     if (needs_null_check) {
8946       MaybeRecordImplicitNullCheck(instruction);
8947     }
8948     // Note: We need a specific width for the unpoisoning NEG.
8949     if (kPoisonHeapReferences) {
8950       if (narrow) {
8951         // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
8952         __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
8953       } else {
8954         __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
8955       }
8956     }
8957     __ bind(&return_address);
8958     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
8959               narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
8960                      : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
8961   }
8962   MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip));
8963 }
8964 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check)8965 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8966                                                                  Location ref,
8967                                                                  vixl32::Register obj,
8968                                                                  uint32_t offset,
8969                                                                  Location maybe_temp,
8970                                                                  bool needs_null_check) {
8971   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
8972   vixl32::Register base = obj;
8973   if (offset >= kReferenceLoadMinFarOffset) {
8974     base = RegisterFrom(maybe_temp);
8975     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
8976     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
8977     offset &= (kReferenceLoadMinFarOffset - 1u);
8978   }
8979   GenerateFieldLoadWithBakerReadBarrier(
8980       instruction, ref, obj, MemOperand(base, offset), needs_null_check);
8981 }
8982 
GenerateArrayLoadWithBakerReadBarrier(Location ref,vixl32::Register obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)8983 void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
8984                                                                  vixl32::Register obj,
8985                                                                  uint32_t data_offset,
8986                                                                  Location index,
8987                                                                  Location temp,
8988                                                                  bool needs_null_check) {
8989   DCHECK(kEmitCompilerReadBarrier);
8990   DCHECK(kUseBakerReadBarrier);
8991 
8992   static_assert(
8993       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8994       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8995   ScaleFactor scale_factor = TIMES_4;
8996 
8997   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
8998   // Marking Register) to decide whether we need to enter the slow
8999   // path to mark the reference. Then, in the slow path, check the
9000   // gray bit in the lock word of the reference's holder (`obj`) to
9001   // decide whether to mark `ref` or not.
9002   //
9003   // We use shared thunks for the slow path; shared within the method
9004   // for JIT, across methods for AOT. That thunk checks the holder
9005   // and jumps to the entrypoint if needed. If the holder is not gray,
9006   // it creates a fake dependency and returns to the LDR instruction.
9007   //
9008   //     lr = &gray_return_address;
9009   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
9010   //       goto array_thunk<base_reg>(lr)
9011   //     }
9012   //   not_gray_return_address:
9013   //     // Original reference load. If the offset is too large to fit
9014   //     // into LDR, we use an adjusted base register here.
9015   //     HeapReference<mirror::Object> reference = data[index];
9016   //   gray_return_address:
9017 
9018   DCHECK(index.IsValid());
9019   vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
9020   vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
9021   vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32);  // Raw pointer.
9022 
9023   UseScratchRegisterScope temps(GetVIXLAssembler());
9024   temps.Exclude(ip);
9025   uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
9026 
9027   __ Add(data_reg, obj, Operand(data_offset));
9028   {
9029     size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
9030     size_t wide_instructions =
9031         /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
9032     size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
9033                         narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
9034     ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
9035     vixl32::Label return_address;
9036     EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
9037     __ cmp(mr, Operand(0));
9038     EmitBakerReadBarrierBne(custom_data);
9039     ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
9040     __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
9041     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
9042     // Note: We need a Wide NEG for the unpoisoning.
9043     if (kPoisonHeapReferences) {
9044       __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
9045     }
9046     __ bind(&return_address);
9047     DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
9048               BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
9049   }
9050   MaybeGenerateMarkingRegisterCheck(/* code= */ 22, /* temp_loc= */ LocationFrom(ip));
9051 }
9052 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)9053 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
9054   // The following condition is a compile-time one, so it does not have a run-time cost.
9055   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
9056     // The following condition is a run-time one; it is executed after the
9057     // previous compile-time test, to avoid penalizing non-debug builds.
9058     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
9059       UseScratchRegisterScope temps(GetVIXLAssembler());
9060       vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
9061       GetAssembler()->GenerateMarkingRegisterCheck(temp,
9062                                                    kMarkingRegisterCheckBreakCodeBaseCode + code);
9063     }
9064   }
9065 }
9066 
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9067 SlowPathCodeARMVIXL* CodeGeneratorARMVIXL::AddReadBarrierSlowPath(HInstruction* instruction,
9068                                                                   Location out,
9069                                                                   Location ref,
9070                                                                   Location obj,
9071                                                                   uint32_t offset,
9072                                                                   Location index) {
9073   SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator())
9074       ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index);
9075   AddSlowPath(slow_path);
9076   return slow_path;
9077 }
9078 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9079 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
9080                                                    Location out,
9081                                                    Location ref,
9082                                                    Location obj,
9083                                                    uint32_t offset,
9084                                                    Location index) {
9085   DCHECK(kEmitCompilerReadBarrier);
9086 
9087   // Insert a slow path based read barrier *after* the reference load.
9088   //
9089   // If heap poisoning is enabled, the unpoisoning of the loaded
9090   // reference will be carried out by the runtime within the slow
9091   // path.
9092   //
9093   // Note that `ref` currently does not get unpoisoned (when heap
9094   // poisoning is enabled), which is alright as the `ref` argument is
9095   // not used by the artReadBarrierSlow entry point.
9096   //
9097   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
9098   SlowPathCodeARMVIXL* slow_path =
9099       AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
9100 
9101   __ B(slow_path->GetEntryLabel());
9102   __ Bind(slow_path->GetExitLabel());
9103 }
9104 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)9105 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
9106                                                         Location out,
9107                                                         Location ref,
9108                                                         Location obj,
9109                                                         uint32_t offset,
9110                                                         Location index) {
9111   if (kEmitCompilerReadBarrier) {
9112     // Baker's read barriers shall be handled by the fast path
9113     // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
9114     DCHECK(!kUseBakerReadBarrier);
9115     // If heap poisoning is enabled, unpoisoning will be taken care of
9116     // by the runtime within the slow path.
9117     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
9118   } else if (kPoisonHeapReferences) {
9119     GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
9120   }
9121 }
9122 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)9123 void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
9124                                                           Location out,
9125                                                           Location root) {
9126   DCHECK(kEmitCompilerReadBarrier);
9127 
9128   // Insert a slow path based read barrier *after* the GC root load.
9129   //
9130   // Note that GC roots are not affected by heap poisoning, so we do
9131   // not need to do anything special for this here.
9132   SlowPathCodeARMVIXL* slow_path =
9133       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root);
9134   AddSlowPath(slow_path);
9135 
9136   __ B(slow_path->GetEntryLabel());
9137   __ Bind(slow_path->GetExitLabel());
9138 }
9139 
9140 // Check if the desired_dispatch_info is supported. If it is, return it,
9141 // otherwise return a fall-back info that should be used instead.
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)9142 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
9143     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
9144     ArtMethod* method) {
9145   if (method->IsIntrinsic() &&
9146       desired_dispatch_info.code_ptr_location == CodePtrLocation::kCallCriticalNative) {
9147     // As a work-around for soft-float native ABI interfering with type checks, we are
9148     // inserting fake calls to Float.floatToRawIntBits() or Double.doubleToRawLongBits()
9149     // when a float or double argument is passed in core registers but we cannot do that
9150     // for actual intrinsic implementations that expect them in FP registers. Therefore
9151     // we do not use `kCallCriticalNative` for intrinsics with FP arguments; if they are
9152     // properly intrinsified, the dispatch type does not matter anyway.
9153     ScopedObjectAccess soa(Thread::Current());
9154     uint32_t shorty_len;
9155     const char* shorty = method->GetShorty(&shorty_len);
9156     for (uint32_t i = 1; i != shorty_len; ++i) {
9157       if (shorty[i] == 'D' || shorty[i] == 'F') {
9158         HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
9159         dispatch_info.code_ptr_location = CodePtrLocation::kCallArtMethod;
9160         return dispatch_info;
9161       }
9162     }
9163   }
9164   return desired_dispatch_info;
9165 }
9166 
9167 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)9168 void CodeGeneratorARMVIXL::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
9169   switch (load_kind) {
9170     case MethodLoadKind::kBootImageLinkTimePcRelative: {
9171       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9172       PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
9173       vixl32::Register temp_reg = RegisterFrom(temp);
9174       EmitMovwMovtPlaceholder(labels, temp_reg);
9175       break;
9176     }
9177     case MethodLoadKind::kBootImageRelRo: {
9178       uint32_t boot_image_offset = GetBootImageOffset(invoke);
9179       PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
9180       vixl32::Register temp_reg = RegisterFrom(temp);
9181       EmitMovwMovtPlaceholder(labels, temp_reg);
9182       GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9183       break;
9184     }
9185     case MethodLoadKind::kBssEntry: {
9186       PcRelativePatchInfo* labels = NewMethodBssEntryPatch(invoke->GetMethodReference());
9187       vixl32::Register temp_reg = RegisterFrom(temp);
9188       EmitMovwMovtPlaceholder(labels, temp_reg);
9189       // All aligned loads are implicitly atomic consume operations on ARM.
9190       GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9191       break;
9192     }
9193     case MethodLoadKind::kJitDirectAddress: {
9194       __ Mov(RegisterFrom(temp), Operand::From(invoke->GetResolvedMethod()));
9195       break;
9196     }
9197     case MethodLoadKind::kRuntimeCall: {
9198       // Test situation, don't do anything.
9199       break;
9200     }
9201     default: {
9202       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
9203       UNREACHABLE();
9204     }
9205   }
9206 }
9207 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)9208 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
9209     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
9210   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
9211   switch (invoke->GetMethodLoadKind()) {
9212     case MethodLoadKind::kStringInit: {
9213       uint32_t offset =
9214           GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
9215       // temp = thread->string_init_entrypoint
9216       GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
9217       break;
9218     }
9219     case MethodLoadKind::kRecursive: {
9220       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
9221       break;
9222     }
9223     case MethodLoadKind::kRuntimeCall: {
9224       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
9225       return;  // No code pointer retrieval; the runtime performs the call directly.
9226     }
9227     case MethodLoadKind::kBootImageLinkTimePcRelative:
9228       // Note: Unlike arm64, x86 and x86-64, we do not avoid the materialization of method
9229       // pointer for kCallCriticalNative because it would not save us an instruction from
9230       // the current sequence MOVW+MOVT+ADD(pc)+LDR+BL. The ADD(pc) separates the patched
9231       // offset instructions MOVW+MOVT from the entrypoint load, so they cannot be fused.
9232       FALLTHROUGH_INTENDED;
9233     default: {
9234       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
9235       break;
9236     }
9237   }
9238 
9239   auto call_code_pointer_member = [&](MemberOffset offset) {
9240     // LR = callee_method->member;
9241     GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
9242     {
9243       // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9244       // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9245       ExactAssemblyScope aas(GetVIXLAssembler(),
9246                              vixl32::k16BitT32InstructionSizeInBytes,
9247                              CodeBufferCheckScope::kExactSize);
9248       // LR()
9249       __ blx(lr);
9250       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9251     }
9252   };
9253   switch (invoke->GetCodePtrLocation()) {
9254     case CodePtrLocation::kCallSelf:
9255       {
9256         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9257         ExactAssemblyScope aas(GetVIXLAssembler(),
9258                                vixl32::k32BitT32InstructionSizeInBytes,
9259                                CodeBufferCheckScope::kMaximumSize);
9260         __ bl(GetFrameEntryLabel());
9261         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9262       }
9263       break;
9264     case CodePtrLocation::kCallCriticalNative: {
9265       size_t out_frame_size =
9266           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
9267                                     kAapcsStackAlignment,
9268                                     GetCriticalNativeDirectCallFrameSize>(invoke);
9269       call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
9270       // Move the result when needed due to native and managed ABI mismatch.
9271       switch (invoke->GetType()) {
9272         case DataType::Type::kFloat32:
9273           __ Vmov(s0, r0);
9274           break;
9275         case DataType::Type::kFloat64:
9276           __ Vmov(d0, r0, r1);
9277           break;
9278         case DataType::Type::kBool:
9279         case DataType::Type::kInt8:
9280         case DataType::Type::kUint16:
9281         case DataType::Type::kInt16:
9282         case DataType::Type::kInt32:
9283         case DataType::Type::kInt64:
9284         case DataType::Type::kVoid:
9285           break;
9286         default:
9287           DCHECK(false) << invoke->GetType();
9288           break;
9289       }
9290       if (out_frame_size != 0u) {
9291         DecreaseFrame(out_frame_size);
9292       }
9293       break;
9294     }
9295     case CodePtrLocation::kCallArtMethod:
9296       call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
9297       break;
9298   }
9299 
9300   DCHECK(!IsLeafMethod());
9301 }
9302 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_location,SlowPathCode * slow_path)9303 void CodeGeneratorARMVIXL::GenerateVirtualCall(
9304     HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
9305   vixl32::Register temp = RegisterFrom(temp_location);
9306   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9307       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
9308 
9309   // Use the calling convention instead of the location of the receiver, as
9310   // intrinsics may have put the receiver in a different register. In the intrinsics
9311   // slow path, the arguments have been moved to the right place, so here we are
9312   // guaranteed that the receiver is the first register of the calling convention.
9313   InvokeDexCallingConventionARMVIXL calling_convention;
9314   vixl32::Register receiver = calling_convention.GetRegisterAt(0);
9315   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
9316   {
9317     // Make sure the pc is recorded immediately after the `ldr` instruction.
9318     ExactAssemblyScope aas(GetVIXLAssembler(),
9319                            vixl32::kMaxInstructionSizeInBytes,
9320                            CodeBufferCheckScope::kMaximumSize);
9321     // /* HeapReference<Class> */ temp = receiver->klass_
9322     __ ldr(temp, MemOperand(receiver, class_offset));
9323     MaybeRecordImplicitNullCheck(invoke);
9324   }
9325   // Instead of simply (possibly) unpoisoning `temp` here, we should
9326   // emit a read barrier for the previous class reference load.
9327   // However this is not required in practice, as this is an
9328   // intermediate/temporary reference and because the current
9329   // concurrent copying collector keeps the from-space memory
9330   // intact/accessible until the end of the marking phase (the
9331   // concurrent copying collector may not in the future).
9332   GetAssembler()->MaybeUnpoisonHeapReference(temp);
9333 
9334   // If we're compiling baseline, update the inline cache.
9335   MaybeGenerateInlineCacheCheck(invoke, temp);
9336 
9337   // temp = temp->GetMethodAt(method_offset);
9338   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
9339       kArmPointerSize).Int32Value();
9340   GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
9341   // LR = temp->GetEntryPoint();
9342   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
9343   {
9344     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9345     // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9346     ExactAssemblyScope aas(GetVIXLAssembler(),
9347                            vixl32::k16BitT32InstructionSizeInBytes,
9348                            CodeBufferCheckScope::kExactSize);
9349     // LR();
9350     __ blx(lr);
9351     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9352   }
9353 }
9354 
NewBootImageIntrinsicPatch(uint32_t intrinsic_data)9355 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch(
9356     uint32_t intrinsic_data) {
9357   return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_other_patches_);
9358 }
9359 
NewBootImageRelRoPatch(uint32_t boot_image_offset)9360 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
9361     uint32_t boot_image_offset) {
9362   return NewPcRelativePatch(/* dex_file= */ nullptr,
9363                             boot_image_offset,
9364                             &boot_image_other_patches_);
9365 }
9366 
NewBootImageMethodPatch(MethodReference target_method)9367 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
9368     MethodReference target_method) {
9369   return NewPcRelativePatch(
9370       target_method.dex_file, target_method.index, &boot_image_method_patches_);
9371 }
9372 
NewMethodBssEntryPatch(MethodReference target_method)9373 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch(
9374     MethodReference target_method) {
9375   return NewPcRelativePatch(
9376       target_method.dex_file, target_method.index, &method_bss_entry_patches_);
9377 }
9378 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9379 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch(
9380     const DexFile& dex_file, dex::TypeIndex type_index) {
9381   return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_);
9382 }
9383 
NewTypeBssEntryPatch(HLoadClass * load_class)9384 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch(
9385     HLoadClass* load_class) {
9386   const DexFile& dex_file = load_class->GetDexFile();
9387   dex::TypeIndex type_index = load_class->GetTypeIndex();
9388   ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
9389   switch (load_class->GetLoadKind()) {
9390     case HLoadClass::LoadKind::kBssEntry:
9391       patches = &type_bss_entry_patches_;
9392       break;
9393     case HLoadClass::LoadKind::kBssEntryPublic:
9394       patches = &public_type_bss_entry_patches_;
9395       break;
9396     case HLoadClass::LoadKind::kBssEntryPackage:
9397       patches = &package_type_bss_entry_patches_;
9398       break;
9399     default:
9400       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
9401       UNREACHABLE();
9402   }
9403   return NewPcRelativePatch(&dex_file, type_index.index_, patches);
9404 }
9405 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index)9406 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch(
9407     const DexFile& dex_file, dex::StringIndex string_index) {
9408   return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_);
9409 }
9410 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index)9411 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch(
9412     const DexFile& dex_file, dex::StringIndex string_index) {
9413   return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_);
9414 }
9415 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,ArenaDeque<PcRelativePatchInfo> * patches)9416 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
9417     const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
9418   patches->emplace_back(dex_file, offset_or_index);
9419   return &patches->back();
9420 }
9421 
EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset)9422 void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) {
9423   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
9424   DCHECK(!GetCompilerOptions().IsJitCompiler());
9425   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
9426   vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label;
9427   __ bind(bl_label);
9428   vixl32::Label placeholder_label;
9429   __ bl(&placeholder_label);  // Placeholder, patched at link-time.
9430   __ bind(&placeholder_label);
9431 }
9432 
EmitBakerReadBarrierBne(uint32_t custom_data)9433 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
9434   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
9435   if (GetCompilerOptions().IsJitCompiler()) {
9436     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
9437     vixl::aarch32::Label* slow_path_entry = &it->second.label;
9438     __ b(ne, EncodingSize(Wide), slow_path_entry);
9439   } else {
9440     baker_read_barrier_patches_.emplace_back(custom_data);
9441     vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label;
9442     __ bind(patch_label);
9443     vixl32::Label placeholder_label;
9444     __ b(ne, EncodingSize(Wide), &placeholder_label);  // Placeholder, patched at link-time.
9445     __ bind(&placeholder_label);
9446   }
9447 }
9448 
DeduplicateBootImageAddressLiteral(uint32_t address)9449 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
9450   return DeduplicateUint32Literal(address, &uint32_literals_);
9451 }
9452 
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)9453 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
9454     const DexFile& dex_file,
9455     dex::StringIndex string_index,
9456     Handle<mirror::String> handle) {
9457   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
9458   return jit_string_patches_.GetOrCreate(
9459       StringReference(&dex_file, string_index),
9460       [this]() {
9461         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9462       });
9463 }
9464 
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)9465 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file,
9466                                                       dex::TypeIndex type_index,
9467                                                       Handle<mirror::Class> handle) {
9468   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
9469   return jit_class_patches_.GetOrCreate(
9470       TypeReference(&dex_file, type_index),
9471       [this]() {
9472         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9473       });
9474 }
9475 
LoadBootImageAddress(vixl32::Register reg,uint32_t boot_image_reference)9476 void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
9477                                                 uint32_t boot_image_reference) {
9478   if (GetCompilerOptions().IsBootImage()) {
9479     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9480         NewBootImageIntrinsicPatch(boot_image_reference);
9481     EmitMovwMovtPlaceholder(labels, reg);
9482   } else if (GetCompilerOptions().GetCompilePic()) {
9483     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9484         NewBootImageRelRoPatch(boot_image_reference);
9485     EmitMovwMovtPlaceholder(labels, reg);
9486     __ Ldr(reg, MemOperand(reg, /* offset= */ 0));
9487   } else {
9488     DCHECK(GetCompilerOptions().IsJitCompiler());
9489     gc::Heap* heap = Runtime::Current()->GetHeap();
9490     DCHECK(!heap->GetBootImageSpaces().empty());
9491     uintptr_t address =
9492         reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
9493     __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address)));
9494   }
9495 }
9496 
LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,TypeReference target_type)9497 void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,
9498                                                          TypeReference target_type) {
9499   // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
9500   DCHECK(GetCompilerOptions().IsBootImage());
9501   PcRelativePatchInfo* labels =
9502       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
9503   EmitMovwMovtPlaceholder(labels, reg);
9504 }
9505 
LoadIntrinsicDeclaringClass(vixl32::Register reg,HInvoke * invoke)9506 void CodeGeneratorARMVIXL::LoadIntrinsicDeclaringClass(vixl32::Register reg, HInvoke* invoke) {
9507   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
9508   if (GetCompilerOptions().IsBootImage()) {
9509     MethodReference target_method = invoke->GetResolvedMethodReference();
9510     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
9511     LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
9512   } else {
9513     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
9514     LoadBootImageAddress(reg, boot_image_offset);
9515   }
9516 }
9517 
LoadClassRootForIntrinsic(vixl::aarch32::Register reg,ClassRoot class_root)9518 void CodeGeneratorARMVIXL::LoadClassRootForIntrinsic(vixl::aarch32::Register reg,
9519                                                      ClassRoot class_root) {
9520   if (GetCompilerOptions().IsBootImage()) {
9521     ScopedObjectAccess soa(Thread::Current());
9522     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
9523     TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
9524     LoadTypeForBootImageIntrinsic(reg, target_type);
9525   } else {
9526     uint32_t boot_image_offset = GetBootImageOffset(class_root);
9527     LoadBootImageAddress(reg, boot_image_offset);
9528   }
9529 }
9530 
9531 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)9532 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
9533     const ArenaDeque<PcRelativePatchInfo>& infos,
9534     ArenaVector<linker::LinkerPatch>* linker_patches) {
9535   for (const PcRelativePatchInfo& info : infos) {
9536     const DexFile* dex_file = info.target_dex_file;
9537     size_t offset_or_index = info.offset_or_index;
9538     DCHECK(info.add_pc_label.IsBound());
9539     uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
9540     // Add MOVW patch.
9541     DCHECK(info.movw_label.IsBound());
9542     uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
9543     linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index));
9544     // Add MOVT patch.
9545     DCHECK(info.movt_label.IsBound());
9546     uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
9547     linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index));
9548   }
9549 }
9550 
9551 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)9552 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
9553                                      const DexFile* target_dex_file,
9554                                      uint32_t pc_insn_offset,
9555                                      uint32_t boot_image_offset) {
9556   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
9557   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
9558 }
9559 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)9560 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
9561   DCHECK(linker_patches->empty());
9562   size_t size =
9563       /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() +
9564       /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
9565       /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() +
9566       /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
9567       /* MOVW+MOVT for each entry */ 2u * public_type_bss_entry_patches_.size() +
9568       /* MOVW+MOVT for each entry */ 2u * package_type_bss_entry_patches_.size() +
9569       /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
9570       /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
9571       /* MOVW+MOVT for each entry */ 2u * boot_image_other_patches_.size() +
9572       call_entrypoint_patches_.size() +
9573       baker_read_barrier_patches_.size();
9574   linker_patches->reserve(size);
9575   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
9576     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
9577         boot_image_method_patches_, linker_patches);
9578     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
9579         boot_image_type_patches_, linker_patches);
9580     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
9581         boot_image_string_patches_, linker_patches);
9582   } else {
9583     DCHECK(boot_image_method_patches_.empty());
9584     DCHECK(boot_image_type_patches_.empty());
9585     DCHECK(boot_image_string_patches_.empty());
9586   }
9587   if (GetCompilerOptions().IsBootImage()) {
9588     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
9589         boot_image_other_patches_, linker_patches);
9590   } else {
9591     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
9592         boot_image_other_patches_, linker_patches);
9593   }
9594   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
9595       method_bss_entry_patches_, linker_patches);
9596   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
9597       type_bss_entry_patches_, linker_patches);
9598   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
9599       public_type_bss_entry_patches_, linker_patches);
9600   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
9601       package_type_bss_entry_patches_, linker_patches);
9602   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
9603       string_bss_entry_patches_, linker_patches);
9604   for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) {
9605     DCHECK(info.target_dex_file == nullptr);
9606     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
9607         info.label.GetLocation(), info.offset_or_index));
9608   }
9609   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
9610     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
9611         info.label.GetLocation(), info.custom_data));
9612   }
9613   DCHECK_EQ(size, linker_patches->size());
9614 }
9615 
NeedsThunkCode(const linker::LinkerPatch & patch) const9616 bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
9617   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
9618          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
9619          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
9620 }
9621 
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)9622 void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
9623                                          /*out*/ ArenaVector<uint8_t>* code,
9624                                          /*out*/ std::string* debug_name) {
9625   arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
9626   switch (patch.GetType()) {
9627     case linker::LinkerPatch::Type::kCallRelative: {
9628       // The thunk just uses the entry point in the ArtMethod. This works even for calls
9629       // to the generic JNI and interpreter trampolines.
9630       MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
9631       assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value());
9632       assembler.GetVIXLAssembler()->Bkpt(0);
9633       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
9634         *debug_name = "MethodCallThunk";
9635       }
9636       break;
9637     }
9638     case linker::LinkerPatch::Type::kCallEntrypoint: {
9639       assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset());
9640       assembler.GetVIXLAssembler()->Bkpt(0);
9641       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
9642         *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset());
9643       }
9644       break;
9645     }
9646     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
9647       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
9648       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
9649       break;
9650     }
9651     default:
9652       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
9653       UNREACHABLE();
9654   }
9655 
9656   // Ensure we emit the literal pool if any.
9657   assembler.FinalizeCode();
9658   code->resize(assembler.CodeSize());
9659   MemoryRegion code_region(code->data(), code->size());
9660   assembler.FinalizeInstructions(code_region);
9661 }
9662 
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)9663 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
9664     uint32_t value,
9665     Uint32ToLiteralMap* map) {
9666   return map->GetOrCreate(
9667       value,
9668       [this, value]() {
9669         return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value);
9670       });
9671 }
9672 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)9673 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
9674   LocationSummary* locations =
9675       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
9676   locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
9677                      Location::RequiresRegister());
9678   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
9679   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
9680   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
9681 }
9682 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)9683 void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
9684   vixl32::Register res = OutputRegister(instr);
9685   vixl32::Register accumulator =
9686       InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
9687   vixl32::Register mul_left =
9688       InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
9689   vixl32::Register mul_right =
9690       InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
9691 
9692   if (instr->GetOpKind() == HInstruction::kAdd) {
9693     __ Mla(res, mul_left, mul_right, accumulator);
9694   } else {
9695     __ Mls(res, mul_left, mul_right, accumulator);
9696   }
9697 }
9698 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)9699 void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
9700   // Nothing to do, this should be removed during prepare for register allocator.
9701   LOG(FATAL) << "Unreachable";
9702 }
9703 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)9704 void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
9705   // Nothing to do, this should be removed during prepare for register allocator.
9706   LOG(FATAL) << "Unreachable";
9707 }
9708 
9709 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)9710 void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
9711   LocationSummary* locations =
9712       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
9713   locations->SetInAt(0, Location::RequiresRegister());
9714   if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
9715       codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
9716     locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
9717     if (switch_instr->GetStartValue() != 0) {
9718       locations->AddTemp(Location::RequiresRegister());  // We need a temp for the bias.
9719     }
9720   }
9721 }
9722 
9723 // TODO(VIXL): Investigate and reach the parity with old arm codegen.
VisitPackedSwitch(HPackedSwitch * switch_instr)9724 void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
9725   int32_t lower_bound = switch_instr->GetStartValue();
9726   uint32_t num_entries = switch_instr->GetNumEntries();
9727   LocationSummary* locations = switch_instr->GetLocations();
9728   vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
9729   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
9730 
9731   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
9732       !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
9733     // Create a series of compare/jumps.
9734     UseScratchRegisterScope temps(GetVIXLAssembler());
9735     vixl32::Register temp_reg = temps.Acquire();
9736     // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
9737     // the immediate, because IP is used as the destination register. For the other
9738     // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
9739     // and they can be encoded in the instruction without making use of IP register.
9740     __ Adds(temp_reg, value_reg, -lower_bound);
9741 
9742     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
9743     // Jump to successors[0] if value == lower_bound.
9744     __ B(eq, codegen_->GetLabelOf(successors[0]));
9745     int32_t last_index = 0;
9746     for (; num_entries - last_index > 2; last_index += 2) {
9747       __ Adds(temp_reg, temp_reg, -2);
9748       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
9749       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
9750       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
9751       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
9752     }
9753     if (num_entries - last_index == 2) {
9754       // The last missing case_value.
9755       __ Cmp(temp_reg, 1);
9756       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
9757     }
9758 
9759     // And the default for any other value.
9760     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
9761       __ B(codegen_->GetLabelOf(default_block));
9762     }
9763   } else {
9764     // Create a table lookup.
9765     vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
9766 
9767     JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
9768 
9769     // Remove the bias.
9770     vixl32::Register key_reg;
9771     if (lower_bound != 0) {
9772       key_reg = RegisterFrom(locations->GetTemp(1));
9773       __ Sub(key_reg, value_reg, lower_bound);
9774     } else {
9775       key_reg = value_reg;
9776     }
9777 
9778     // Check whether the value is in the table, jump to default block if not.
9779     __ Cmp(key_reg, num_entries - 1);
9780     __ B(hi, codegen_->GetLabelOf(default_block));
9781 
9782     UseScratchRegisterScope temps(GetVIXLAssembler());
9783     vixl32::Register jump_offset = temps.Acquire();
9784 
9785     // Load jump offset from the table.
9786     {
9787       const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
9788       ExactAssemblyScope aas(GetVIXLAssembler(),
9789                              (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size,
9790                              CodeBufferCheckScope::kMaximumSize);
9791       __ adr(table_base, jump_table->GetTableStartLabel());
9792       __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
9793 
9794       // Jump to target block by branching to table_base(pc related) + offset.
9795       vixl32::Register target_address = table_base;
9796       __ add(target_address, table_base, jump_offset);
9797       __ bx(target_address);
9798 
9799       jump_table->EmitTable(codegen_);
9800     }
9801   }
9802 }
9803 
9804 // Copy the result of a call into the given target.
MoveFromReturnRegister(Location trg,DataType::Type type)9805 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) {
9806   if (!trg.IsValid()) {
9807     DCHECK_EQ(type, DataType::Type::kVoid);
9808     return;
9809   }
9810 
9811   DCHECK_NE(type, DataType::Type::kVoid);
9812 
9813   Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
9814   if (return_loc.Equals(trg)) {
9815     return;
9816   }
9817 
9818   // Let the parallel move resolver take care of all of this.
9819   HParallelMove parallel_move(GetGraph()->GetAllocator());
9820   parallel_move.AddMove(return_loc, trg, type, nullptr);
9821   GetMoveResolver()->EmitNativeCode(&parallel_move);
9822 }
9823 
VisitClassTableGet(HClassTableGet * instruction)9824 void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
9825   LocationSummary* locations =
9826       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
9827   locations->SetInAt(0, Location::RequiresRegister());
9828   locations->SetOut(Location::RequiresRegister());
9829 }
9830 
VisitClassTableGet(HClassTableGet * instruction)9831 void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
9832   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
9833     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9834         instruction->GetIndex(), kArmPointerSize).SizeValue();
9835     GetAssembler()->LoadFromOffset(kLoadWord,
9836                                    OutputRegister(instruction),
9837                                    InputRegisterAt(instruction, 0),
9838                                    method_offset);
9839   } else {
9840     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
9841         instruction->GetIndex(), kArmPointerSize));
9842     GetAssembler()->LoadFromOffset(kLoadWord,
9843                                    OutputRegister(instruction),
9844                                    InputRegisterAt(instruction, 0),
9845                                    mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
9846     GetAssembler()->LoadFromOffset(kLoadWord,
9847                                    OutputRegister(instruction),
9848                                    OutputRegister(instruction),
9849                                    method_offset);
9850   }
9851 }
9852 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,VIXLUInt32Literal * literal,uint64_t index_in_table)9853 static void PatchJitRootUse(uint8_t* code,
9854                             const uint8_t* roots_data,
9855                             VIXLUInt32Literal* literal,
9856                             uint64_t index_in_table) {
9857   DCHECK(literal->IsBound());
9858   uint32_t literal_offset = literal->GetLocation();
9859   uintptr_t address =
9860       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
9861   uint8_t* data = code + literal_offset;
9862   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
9863 }
9864 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)9865 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
9866   for (const auto& entry : jit_string_patches_) {
9867     const StringReference& string_reference = entry.first;
9868     VIXLUInt32Literal* table_entry_literal = entry.second;
9869     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
9870     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
9871   }
9872   for (const auto& entry : jit_class_patches_) {
9873     const TypeReference& type_reference = entry.first;
9874     VIXLUInt32Literal* table_entry_literal = entry.second;
9875     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
9876     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
9877   }
9878 }
9879 
EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo * labels,vixl32::Register out)9880 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
9881     CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
9882     vixl32::Register out) {
9883   ExactAssemblyScope aas(GetVIXLAssembler(),
9884                          3 * vixl32::kMaxInstructionSizeInBytes,
9885                          CodeBufferCheckScope::kMaximumSize);
9886   // TODO(VIXL): Think about using mov instead of movw.
9887   __ bind(&labels->movw_label);
9888   __ movw(out, /* operand= */ 0u);
9889   __ bind(&labels->movt_label);
9890   __ movt(out, /* operand= */ 0u);
9891   __ bind(&labels->add_pc_label);
9892   __ add(out, out, pc);
9893 }
9894 
9895 #undef __
9896 #undef QUICK_ENTRY_POINT
9897 #undef TODO_VIXL32
9898 
9899 #define __ assembler.GetVIXLAssembler()->
9900 
EmitGrayCheckAndFastPath(ArmVIXLAssembler & assembler,vixl32::Register base_reg,vixl32::MemOperand & lock_word,vixl32::Label * slow_path,int32_t raw_ldr_offset,vixl32::Label * throw_npe=nullptr)9901 static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
9902                                      vixl32::Register base_reg,
9903                                      vixl32::MemOperand& lock_word,
9904                                      vixl32::Label* slow_path,
9905                                      int32_t raw_ldr_offset,
9906                                      vixl32::Label* throw_npe = nullptr) {
9907   // Load the lock word containing the rb_state.
9908   __ Ldr(ip, lock_word);
9909   // Given the numeric representation, it's enough to check the low bit of the rb_state.
9910   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
9911   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
9912   __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
9913   __ B(ne, slow_path, /* is_far_target= */ false);
9914   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
9915   if (throw_npe != nullptr) {
9916     __ Bind(throw_npe);
9917   }
9918   __ Add(lr, lr, raw_ldr_offset);
9919   // Introduce a dependency on the lock_word including rb_state,
9920   // to prevent load-load reordering, and without using
9921   // a memory barrier (which would be more expensive).
9922   __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
9923   __ Bx(lr);          // And return back to the function.
9924   // Note: The fake dependency is unnecessary for the slow path.
9925 }
9926 
9927 // Load the read barrier introspection entrypoint in register `entrypoint`
LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler & assembler)9928 static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
9929   // The register where the read barrier introspection entrypoint is loaded
9930   // is the marking register. We clobber it here and the entrypoint restores it to 1.
9931   vixl32::Register entrypoint = mr;
9932   // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
9933   DCHECK_EQ(ip.GetCode(), 12u);
9934   const int32_t entry_point_offset =
9935       Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
9936   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
9937   return entrypoint;
9938 }
9939 
CompileBakerReadBarrierThunk(ArmVIXLAssembler & assembler,uint32_t encoded_data,std::string * debug_name)9940 void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
9941                                                         uint32_t encoded_data,
9942                                                         /*out*/ std::string* debug_name) {
9943   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
9944   switch (kind) {
9945     case BakerReadBarrierKind::kField: {
9946       vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
9947       CheckValidReg(base_reg.GetCode());
9948       vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
9949       CheckValidReg(holder_reg.GetCode());
9950       BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
9951       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
9952       temps.Exclude(ip);
9953       // In the case of a field load, if `base_reg` differs from
9954       // `holder_reg`, the offset was too large and we must have emitted (during the construction
9955       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
9956       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
9957       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
9958       // not necessarily do that check before going to the thunk.
9959       vixl32::Label throw_npe_label;
9960       vixl32::Label* throw_npe = nullptr;
9961       if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
9962         throw_npe = &throw_npe_label;
9963         __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false);
9964       }
9965       // Check if the holder is gray and, if not, add fake dependency to the base register
9966       // and return to the LDR instruction to load the reference. Otherwise, use introspection
9967       // to load the reference and call the entrypoint that performs further checks on the
9968       // reference and marks it if needed.
9969       vixl32::Label slow_path;
9970       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
9971       const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
9972           ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
9973           : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
9974       EmitGrayCheckAndFastPath(
9975           assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
9976       __ Bind(&slow_path);
9977       const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
9978                                  raw_ldr_offset;
9979       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
9980       if (width == BakerReadBarrierWidth::kWide) {
9981         MemOperand ldr_half_address(lr, ldr_offset + 2);
9982         __ Ldrh(ip, ldr_half_address);        // Load the LDR immediate half-word with "Rt | imm12".
9983         __ Ubfx(ip, ip, 0, 12);               // Extract the offset imm12.
9984         __ Ldr(ip, MemOperand(base_reg, ip));   // Load the reference.
9985       } else {
9986         MemOperand ldr_address(lr, ldr_offset);
9987         __ Ldrh(ip, ldr_address);             // Load the LDR immediate, encoding T1.
9988         __ Add(ep_reg,                        // Adjust the entrypoint address to the entrypoint
9989                ep_reg,                        // for narrow LDR.
9990                Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
9991         __ Ubfx(ip, ip, 6, 5);                // Extract the imm5, i.e. offset / 4.
9992         __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2));   // Load the reference.
9993       }
9994       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
9995       __ Bx(ep_reg);                          // Jump to the entrypoint.
9996       break;
9997     }
9998     case BakerReadBarrierKind::kArray: {
9999       vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10000       CheckValidReg(base_reg.GetCode());
10001       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10002                 BakerReadBarrierSecondRegField::Decode(encoded_data));
10003       DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10004       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10005       temps.Exclude(ip);
10006       vixl32::Label slow_path;
10007       int32_t data_offset =
10008           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
10009       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
10010       DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
10011       const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
10012       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
10013       __ Bind(&slow_path);
10014       const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
10015                                  raw_ldr_offset;
10016       MemOperand ldr_address(lr, ldr_offset + 2);
10017       __ Ldrb(ip, ldr_address);               // Load the LDR (register) byte with "00 | imm2 | Rm",
10018                                               // i.e. Rm+32 because the scale in imm2 is 2.
10019       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10020       __ Bfi(ep_reg, ip, 3, 6);               // Insert ip to the entrypoint address to create
10021                                               // a switch case target based on the index register.
10022       __ Mov(ip, base_reg);                   // Move the base register to ip0.
10023       __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
10024       break;
10025     }
10026     case BakerReadBarrierKind::kGcRoot:
10027     case BakerReadBarrierKind::kIntrinsicCas: {
10028       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
10029       // and it does not have a forwarding address), call the correct introspection entrypoint;
10030       // otherwise return the reference (or the extracted forwarding address).
10031       // There is no gray bit check for GC roots.
10032       vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
10033       CheckValidReg(root_reg.GetCode());
10034       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10035                 BakerReadBarrierSecondRegField::Decode(encoded_data));
10036       BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
10037       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
10038       temps.Exclude(ip);
10039       vixl32::Label return_label, not_marked, forwarding_address;
10040       __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false);
10041       MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
10042       __ Ldr(ip, lock_word);
10043       __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
10044       __ B(eq, &not_marked);
10045       __ Bind(&return_label);
10046       __ Bx(lr);
10047       __ Bind(&not_marked);
10048       static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
10049                     "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
10050                     " the highest bits and the 'forwarding address' state to have all bits set");
10051       __ Cmp(ip, Operand(0xc0000000));
10052       __ B(hs, &forwarding_address);
10053       vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
10054       // Adjust the art_quick_read_barrier_mark_introspection address
10055       // in kBakerCcEntrypointRegister to one of
10056       //     art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},intrinsic_cas}.
10057       if (kind == BakerReadBarrierKind::kIntrinsicCas) {
10058         DCHECK(width == BakerReadBarrierWidth::kWide);
10059         DCHECK(!root_reg.IsLow());
10060       }
10061       int32_t entrypoint_offset =
10062           (kind == BakerReadBarrierKind::kGcRoot)
10063               ? (width == BakerReadBarrierWidth::kWide)
10064                   ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
10065                   : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
10066               : BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET;
10067       __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
10068       __ Mov(ip, root_reg);
10069       __ Bx(ep_reg);
10070       __ Bind(&forwarding_address);
10071       __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
10072       __ Bx(lr);
10073       break;
10074     }
10075     default:
10076       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
10077       UNREACHABLE();
10078   }
10079 
10080   // For JIT, the slow path is considered part of the compiled method,
10081   // so JIT should pass null as `debug_name`.
10082   DCHECK(!GetCompilerOptions().IsJitCompiler() || debug_name == nullptr);
10083   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
10084     std::ostringstream oss;
10085     oss << "BakerReadBarrierThunk";
10086     switch (kind) {
10087       case BakerReadBarrierKind::kField:
10088         oss << "Field";
10089         if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10090           oss << "Wide";
10091         }
10092         oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
10093             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
10094         break;
10095       case BakerReadBarrierKind::kArray:
10096         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10097         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10098                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10099         DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10100         break;
10101       case BakerReadBarrierKind::kGcRoot:
10102         oss << "GcRoot";
10103         if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
10104           oss << "Wide";
10105         }
10106         oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10107         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10108                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10109         break;
10110       case BakerReadBarrierKind::kIntrinsicCas:
10111         oss << "IntrinsicCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
10112         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
10113                   BakerReadBarrierSecondRegField::Decode(encoded_data));
10114         DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
10115         break;
10116     }
10117     *debug_name = oss.str();
10118   }
10119 }
10120 
10121 #undef __
10122 
10123 }  // namespace arm
10124 }  // namespace art
10125