1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm_vixl.h"
18 
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "art_method.h"
21 #include "code_generator_arm_vixl.h"
22 #include "common_arm.h"
23 #include "lock_word.h"
24 #include "mirror/array-inl.h"
25 #include "mirror/object_array-inl.h"
26 #include "mirror/reference.h"
27 #include "mirror/string.h"
28 #include "scoped_thread_state_change-inl.h"
29 #include "thread-inl.h"
30 
31 #include "aarch32/constants-aarch32.h"
32 
33 namespace art {
34 namespace arm {
35 
36 #define __ assembler->GetVIXLAssembler()->
37 
38 using helpers::DRegisterFrom;
39 using helpers::HighRegisterFrom;
40 using helpers::InputDRegisterAt;
41 using helpers::InputRegisterAt;
42 using helpers::InputSRegisterAt;
43 using helpers::InputVRegisterAt;
44 using helpers::Int32ConstantFrom;
45 using helpers::LocationFrom;
46 using helpers::LowRegisterFrom;
47 using helpers::LowSRegisterFrom;
48 using helpers::HighSRegisterFrom;
49 using helpers::OutputDRegister;
50 using helpers::OutputSRegister;
51 using helpers::OutputRegister;
52 using helpers::OutputVRegister;
53 using helpers::RegisterFrom;
54 using helpers::SRegisterFrom;
55 using helpers::DRegisterFromS;
56 
57 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
58 
59 using vixl::ExactAssemblyScope;
60 using vixl::CodeBufferCheckScope;
61 
GetAssembler()62 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
63   return codegen_->GetAssembler();
64 }
65 
GetAllocator()66 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
67   return codegen_->GetGraph()->GetArena();
68 }
69 
70 // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
71 // intrinsified call. This will copy the arguments into the positions for a regular call.
72 //
73 // Note: The actual parameters are required to be in the locations given by the invoke's location
74 //       summary. If an intrinsic modifies those locations before a slowpath call, they must be
75 //       restored!
76 //
77 // Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
78 //       sub-optimal (compared to a direct pointer call), but this is a slow-path.
79 
80 class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
81  public:
IntrinsicSlowPathARMVIXL(HInvoke * invoke)82   explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
83       : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
84 
MoveArguments(CodeGenerator * codegen)85   Location MoveArguments(CodeGenerator* codegen) {
86     InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
87     IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
88     return calling_convention_visitor.GetMethodLocation();
89   }
90 
EmitNativeCode(CodeGenerator * codegen)91   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
92     ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
93     __ Bind(GetEntryLabel());
94 
95     SaveLiveRegisters(codegen, invoke_->GetLocations());
96 
97     Location method_loc = MoveArguments(codegen);
98 
99     if (invoke_->IsInvokeStaticOrDirect()) {
100       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
101     } else {
102       codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
103     }
104     codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
105 
106     // Copy the result back to the expected output.
107     Location out = invoke_->GetLocations()->Out();
108     if (out.IsValid()) {
109       DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
110       DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
111       codegen->MoveFromReturnRegister(out, invoke_->GetType());
112     }
113 
114     RestoreLiveRegisters(codegen, invoke_->GetLocations());
115     __ B(GetExitLabel());
116   }
117 
GetDescription() const118   const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
119 
120  private:
121   // The instruction where this slow path is happening.
122   HInvoke* const invoke_;
123 
124   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
125 };
126 
127 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmVIXLAssembler * assembler,Primitive::Type type,const vixl32::Register & array,const Location & pos,const vixl32::Register & base)128 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
129                                           Primitive::Type type,
130                                           const vixl32::Register& array,
131                                           const Location& pos,
132                                           const vixl32::Register& base) {
133   // This routine is only used by the SystemArrayCopy intrinsic at the
134   // moment. We can allow Primitive::kPrimNot as `type` to implement
135   // the SystemArrayCopyChar intrinsic.
136   DCHECK_EQ(type, Primitive::kPrimNot);
137   const int32_t element_size = Primitive::ComponentSize(type);
138   const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
139   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
140 
141   if (pos.IsConstant()) {
142     int32_t constant = Int32ConstantFrom(pos);
143     __ Add(base, array, element_size * constant + data_offset);
144   } else {
145     __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
146     __ Add(base, base, data_offset);
147   }
148 }
149 
150 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmVIXLAssembler * assembler,Primitive::Type type,const Location & copy_length,const vixl32::Register & base,const vixl32::Register & end)151 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
152                                          Primitive::Type type,
153                                          const Location& copy_length,
154                                          const vixl32::Register& base,
155                                          const vixl32::Register& end) {
156   // This routine is only used by the SystemArrayCopy intrinsic at the
157   // moment. We can allow Primitive::kPrimNot as `type` to implement
158   // the SystemArrayCopyChar intrinsic.
159   DCHECK_EQ(type, Primitive::kPrimNot);
160   const int32_t element_size = Primitive::ComponentSize(type);
161   const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
162 
163   if (copy_length.IsConstant()) {
164     int32_t constant = Int32ConstantFrom(copy_length);
165     __ Add(end, base, element_size * constant);
166   } else {
167     __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
168   }
169 }
170 
171 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
172 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
173  public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)174   explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
175       : SlowPathCodeARMVIXL(instruction) {
176     DCHECK(kEmitCompilerReadBarrier);
177     DCHECK(kUseBakerReadBarrier);
178   }
179 
EmitNativeCode(CodeGenerator * codegen)180   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
181     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
182     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
183     LocationSummary* locations = instruction_->GetLocations();
184     DCHECK(locations->CanCall());
185     DCHECK(instruction_->IsInvokeStaticOrDirect())
186         << "Unexpected instruction in read barrier arraycopy slow path: "
187         << instruction_->DebugName();
188     DCHECK(instruction_->GetLocations()->Intrinsified());
189     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
190 
191     Primitive::Type type = Primitive::kPrimNot;
192     const int32_t element_size = Primitive::ComponentSize(type);
193 
194     vixl32::Register dest = InputRegisterAt(instruction_, 2);
195     Location dest_pos = locations->InAt(3);
196     vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
197     vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
198     vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
199     vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
200 
201     __ Bind(GetEntryLabel());
202     // Compute the base destination address in `dst_curr_addr`.
203     GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
204 
205     vixl32::Label loop;
206     __ Bind(&loop);
207     __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
208     assembler->MaybeUnpoisonHeapReference(tmp);
209     // TODO: Inline the mark bit check before calling the runtime?
210     // tmp = ReadBarrier::Mark(tmp);
211     // No need to save live registers; it's taken care of by the
212     // entrypoint. Also, there is no need to update the stack mask,
213     // as this runtime call will not trigger a garbage collection.
214     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
215     // explanations.)
216     DCHECK(!tmp.IsSP());
217     DCHECK(!tmp.IsLR());
218     DCHECK(!tmp.IsPC());
219     // IP is used internally by the ReadBarrierMarkRegX entry point
220     // as a temporary (and not preserved).  It thus cannot be used by
221     // any live register in this slow path.
222     DCHECK(!src_curr_addr.Is(ip));
223     DCHECK(!dst_curr_addr.Is(ip));
224     DCHECK(!src_stop_addr.Is(ip));
225     DCHECK(!tmp.Is(ip));
226     DCHECK(tmp.IsRegister()) << tmp;
227     // TODO: Load the entrypoint once before the loop, instead of
228     // loading it at every iteration.
229     int32_t entry_point_offset =
230         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
231     // This runtime call does not require a stack map.
232     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
233     assembler->MaybePoisonHeapReference(tmp);
234     __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
235     __ Cmp(src_curr_addr, src_stop_addr);
236     __ B(ne, &loop, /* far_target */ false);
237     __ B(GetExitLabel());
238   }
239 
GetDescription() const240   const char* GetDescription() const OVERRIDE {
241     return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
242   }
243 
244  private:
245   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
246 };
247 
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)248 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
249     : arena_(codegen->GetGraph()->GetArena()),
250       codegen_(codegen),
251       assembler_(codegen->GetAssembler()),
252       features_(codegen->GetInstructionSetFeatures()) {}
253 
TryDispatch(HInvoke * invoke)254 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
255   Dispatch(invoke);
256   LocationSummary* res = invoke->GetLocations();
257   if (res == nullptr) {
258     return false;
259   }
260   return res->Intrinsified();
261 }
262 
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)263 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
264   LocationSummary* locations = new (arena) LocationSummary(invoke,
265                                                            LocationSummary::kNoCall,
266                                                            kIntrinsified);
267   locations->SetInAt(0, Location::RequiresFpuRegister());
268   locations->SetOut(Location::RequiresRegister());
269 }
270 
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)271 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
272   LocationSummary* locations = new (arena) LocationSummary(invoke,
273                                                            LocationSummary::kNoCall,
274                                                            kIntrinsified);
275   locations->SetInAt(0, Location::RequiresRegister());
276   locations->SetOut(Location::RequiresFpuRegister());
277 }
278 
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)279 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
280   Location input = locations->InAt(0);
281   Location output = locations->Out();
282   if (is64bit) {
283     __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
284   } else {
285     __ Vmov(RegisterFrom(output), SRegisterFrom(input));
286   }
287 }
288 
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)289 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
290   Location input = locations->InAt(0);
291   Location output = locations->Out();
292   if (is64bit) {
293     __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
294   } else {
295     __ Vmov(SRegisterFrom(output), RegisterFrom(input));
296   }
297 }
298 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)299 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
300   CreateFPToIntLocations(arena_, invoke);
301 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)302 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
303   CreateIntToFPLocations(arena_, invoke);
304 }
305 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)306 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
307   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
308 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)309 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
310   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
311 }
312 
VisitFloatFloatToRawIntBits(HInvoke * invoke)313 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
314   CreateFPToIntLocations(arena_, invoke);
315 }
VisitFloatIntBitsToFloat(HInvoke * invoke)316 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
317   CreateIntToFPLocations(arena_, invoke);
318 }
319 
VisitFloatFloatToRawIntBits(HInvoke * invoke)320 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
321   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
322 }
VisitFloatIntBitsToFloat(HInvoke * invoke)323 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
324   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
325 }
326 
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)327 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
328   LocationSummary* locations = new (arena) LocationSummary(invoke,
329                                                            LocationSummary::kNoCall,
330                                                            kIntrinsified);
331   locations->SetInAt(0, Location::RequiresRegister());
332   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
333 }
334 
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)335 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
336   LocationSummary* locations = new (arena) LocationSummary(invoke,
337                                                            LocationSummary::kNoCall,
338                                                            kIntrinsified);
339   locations->SetInAt(0, Location::RequiresFpuRegister());
340   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
341 }
342 
GenNumberOfLeadingZeros(HInvoke * invoke,Primitive::Type type,CodeGeneratorARMVIXL * codegen)343 static void GenNumberOfLeadingZeros(HInvoke* invoke,
344                                     Primitive::Type type,
345                                     CodeGeneratorARMVIXL* codegen) {
346   ArmVIXLAssembler* assembler = codegen->GetAssembler();
347   LocationSummary* locations = invoke->GetLocations();
348   Location in = locations->InAt(0);
349   vixl32::Register out = RegisterFrom(locations->Out());
350 
351   DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
352 
353   if (type == Primitive::kPrimLong) {
354     vixl32::Register in_reg_lo = LowRegisterFrom(in);
355     vixl32::Register in_reg_hi = HighRegisterFrom(in);
356     vixl32::Label end;
357     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
358     __ Clz(out, in_reg_hi);
359     __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
360     __ Clz(out, in_reg_lo);
361     __ Add(out, out, 32);
362     if (end.IsReferenced()) {
363       __ Bind(&end);
364     }
365   } else {
366     __ Clz(out, RegisterFrom(in));
367   }
368 }
369 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)370 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
371   CreateIntToIntLocations(arena_, invoke);
372 }
373 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)374 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
375   GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
376 }
377 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)378 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
379   LocationSummary* locations = new (arena_) LocationSummary(invoke,
380                                                            LocationSummary::kNoCall,
381                                                            kIntrinsified);
382   locations->SetInAt(0, Location::RequiresRegister());
383   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
384 }
385 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)386 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
387   GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
388 }
389 
GenNumberOfTrailingZeros(HInvoke * invoke,Primitive::Type type,CodeGeneratorARMVIXL * codegen)390 static void GenNumberOfTrailingZeros(HInvoke* invoke,
391                                      Primitive::Type type,
392                                      CodeGeneratorARMVIXL* codegen) {
393   DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
394 
395   ArmVIXLAssembler* assembler = codegen->GetAssembler();
396   LocationSummary* locations = invoke->GetLocations();
397   vixl32::Register out = RegisterFrom(locations->Out());
398 
399   if (type == Primitive::kPrimLong) {
400     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
401     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
402     vixl32::Label end;
403     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
404     __ Rbit(out, in_reg_lo);
405     __ Clz(out, out);
406     __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
407     __ Rbit(out, in_reg_hi);
408     __ Clz(out, out);
409     __ Add(out, out, 32);
410     if (end.IsReferenced()) {
411       __ Bind(&end);
412     }
413   } else {
414     vixl32::Register in = RegisterFrom(locations->InAt(0));
415     __ Rbit(out, in);
416     __ Clz(out, out);
417   }
418 }
419 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)420 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
421   LocationSummary* locations = new (arena_) LocationSummary(invoke,
422                                                             LocationSummary::kNoCall,
423                                                             kIntrinsified);
424   locations->SetInAt(0, Location::RequiresRegister());
425   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
426 }
427 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)428 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
429   GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
430 }
431 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)432 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
433   LocationSummary* locations = new (arena_) LocationSummary(invoke,
434                                                             LocationSummary::kNoCall,
435                                                             kIntrinsified);
436   locations->SetInAt(0, Location::RequiresRegister());
437   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
438 }
439 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)440 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
441   GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
442 }
443 
MathAbsFP(HInvoke * invoke,ArmVIXLAssembler * assembler)444 static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
445   __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
446 }
447 
VisitMathAbsDouble(HInvoke * invoke)448 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
449   CreateFPToFPLocations(arena_, invoke);
450 }
451 
VisitMathAbsDouble(HInvoke * invoke)452 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
453   MathAbsFP(invoke, GetAssembler());
454 }
455 
VisitMathAbsFloat(HInvoke * invoke)456 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
457   CreateFPToFPLocations(arena_, invoke);
458 }
459 
VisitMathAbsFloat(HInvoke * invoke)460 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
461   MathAbsFP(invoke, GetAssembler());
462 }
463 
CreateIntToIntPlusTemp(ArenaAllocator * arena,HInvoke * invoke)464 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
465   LocationSummary* locations = new (arena) LocationSummary(invoke,
466                                                            LocationSummary::kNoCall,
467                                                            kIntrinsified);
468   locations->SetInAt(0, Location::RequiresRegister());
469   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
470 
471   locations->AddTemp(Location::RequiresRegister());
472 }
473 
GenAbsInteger(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)474 static void GenAbsInteger(LocationSummary* locations,
475                           bool is64bit,
476                           ArmVIXLAssembler* assembler) {
477   Location in = locations->InAt(0);
478   Location output = locations->Out();
479 
480   vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
481 
482   if (is64bit) {
483     vixl32::Register in_reg_lo = LowRegisterFrom(in);
484     vixl32::Register in_reg_hi = HighRegisterFrom(in);
485     vixl32::Register out_reg_lo = LowRegisterFrom(output);
486     vixl32::Register out_reg_hi = HighRegisterFrom(output);
487 
488     DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
489 
490     __ Asr(mask, in_reg_hi, 31);
491     __ Adds(out_reg_lo, in_reg_lo, mask);
492     __ Adc(out_reg_hi, in_reg_hi, mask);
493     __ Eor(out_reg_lo, mask, out_reg_lo);
494     __ Eor(out_reg_hi, mask, out_reg_hi);
495   } else {
496     vixl32::Register in_reg = RegisterFrom(in);
497     vixl32::Register out_reg = RegisterFrom(output);
498 
499     __ Asr(mask, in_reg, 31);
500     __ Add(out_reg, in_reg, mask);
501     __ Eor(out_reg, mask, out_reg);
502   }
503 }
504 
VisitMathAbsInt(HInvoke * invoke)505 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
506   CreateIntToIntPlusTemp(arena_, invoke);
507 }
508 
VisitMathAbsInt(HInvoke * invoke)509 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
510   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
511 }
512 
513 
VisitMathAbsLong(HInvoke * invoke)514 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
515   CreateIntToIntPlusTemp(arena_, invoke);
516 }
517 
VisitMathAbsLong(HInvoke * invoke)518 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
519   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
520 }
521 
GenMinMaxFloat(HInvoke * invoke,bool is_min,CodeGeneratorARMVIXL * codegen)522 static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
523   ArmVIXLAssembler* assembler = codegen->GetAssembler();
524   Location op1_loc = invoke->GetLocations()->InAt(0);
525   Location op2_loc = invoke->GetLocations()->InAt(1);
526   Location out_loc = invoke->GetLocations()->Out();
527 
528   // Optimization: don't generate any code if inputs are the same.
529   if (op1_loc.Equals(op2_loc)) {
530     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
531     return;
532   }
533 
534   vixl32::SRegister op1 = SRegisterFrom(op1_loc);
535   vixl32::SRegister op2 = SRegisterFrom(op2_loc);
536   vixl32::SRegister out = OutputSRegister(invoke);
537   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
538   const vixl32::Register temp1 = temps.Acquire();
539   vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
540   vixl32::Label nan, done;
541   vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
542 
543   DCHECK(op1.Is(out));
544 
545   __ Vcmp(op1, op2);
546   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
547   __ B(vs, &nan, /* far_target */ false);  // if un-ordered, go to NaN handling.
548 
549   // op1 <> op2
550   vixl32::ConditionType cond = is_min ? gt : lt;
551   {
552     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
553                                 2 * kMaxInstructionSizeInBytes,
554                                 CodeBufferCheckScope::kMaximumSize);
555     __ it(cond);
556     __ vmov(cond, F32, out, op2);
557   }
558   // for <>(not equal), we've done min/max calculation.
559   __ B(ne, final_label, /* far_target */ false);
560 
561   // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
562   __ Vmov(temp1, op1);
563   __ Vmov(temp2, op2);
564   if (is_min) {
565     __ Orr(temp1, temp1, temp2);
566   } else {
567     __ And(temp1, temp1, temp2);
568   }
569   __ Vmov(out, temp1);
570   __ B(final_label);
571 
572   // handle NaN input.
573   __ Bind(&nan);
574   __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
575   __ Vmov(out, temp1);
576 
577   if (done.IsReferenced()) {
578     __ Bind(&done);
579   }
580 }
581 
CreateFPFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)582 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
583   LocationSummary* locations = new (arena) LocationSummary(invoke,
584                                                            LocationSummary::kNoCall,
585                                                            kIntrinsified);
586   locations->SetInAt(0, Location::RequiresFpuRegister());
587   locations->SetInAt(1, Location::RequiresFpuRegister());
588   locations->SetOut(Location::SameAsFirstInput());
589 }
590 
VisitMathMinFloatFloat(HInvoke * invoke)591 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
592   CreateFPFPToFPLocations(arena_, invoke);
593   invoke->GetLocations()->AddTemp(Location::RequiresRegister());
594 }
595 
VisitMathMinFloatFloat(HInvoke * invoke)596 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
597   GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
598 }
599 
VisitMathMaxFloatFloat(HInvoke * invoke)600 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
601   CreateFPFPToFPLocations(arena_, invoke);
602   invoke->GetLocations()->AddTemp(Location::RequiresRegister());
603 }
604 
VisitMathMaxFloatFloat(HInvoke * invoke)605 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
606   GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
607 }
608 
GenMinMaxDouble(HInvoke * invoke,bool is_min,CodeGeneratorARMVIXL * codegen)609 static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
610   ArmVIXLAssembler* assembler = codegen->GetAssembler();
611   Location op1_loc = invoke->GetLocations()->InAt(0);
612   Location op2_loc = invoke->GetLocations()->InAt(1);
613   Location out_loc = invoke->GetLocations()->Out();
614 
615   // Optimization: don't generate any code if inputs are the same.
616   if (op1_loc.Equals(op2_loc)) {
617     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in.
618     return;
619   }
620 
621   vixl32::DRegister op1 = DRegisterFrom(op1_loc);
622   vixl32::DRegister op2 = DRegisterFrom(op2_loc);
623   vixl32::DRegister out = OutputDRegister(invoke);
624   vixl32::Label handle_nan_eq, done;
625   vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
626 
627   DCHECK(op1.Is(out));
628 
629   __ Vcmp(op1, op2);
630   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
631   __ B(vs, &handle_nan_eq, /* far_target */ false);  // if un-ordered, go to NaN handling.
632 
633   // op1 <> op2
634   vixl32::ConditionType cond = is_min ? gt : lt;
635   {
636     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
637                                 2 * kMaxInstructionSizeInBytes,
638                                 CodeBufferCheckScope::kMaximumSize);
639     __ it(cond);
640     __ vmov(cond, F64, out, op2);
641   }
642   // for <>(not equal), we've done min/max calculation.
643   __ B(ne, final_label, /* far_target */ false);
644 
645   // handle op1 == op2, max(+0.0,-0.0).
646   if (!is_min) {
647     __ Vand(F64, out, op1, op2);
648     __ B(final_label);
649   }
650 
651   // handle op1 == op2, min(+0.0,-0.0), NaN input.
652   __ Bind(&handle_nan_eq);
653   __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
654 
655   if (done.IsReferenced()) {
656     __ Bind(&done);
657   }
658 }
659 
VisitMathMinDoubleDouble(HInvoke * invoke)660 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
661   CreateFPFPToFPLocations(arena_, invoke);
662 }
663 
VisitMathMinDoubleDouble(HInvoke * invoke)664 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
665   GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
666 }
667 
VisitMathMaxDoubleDouble(HInvoke * invoke)668 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
669   CreateFPFPToFPLocations(arena_, invoke);
670 }
671 
VisitMathMaxDoubleDouble(HInvoke * invoke)672 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
673   GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
674 }
675 
GenMinMaxLong(HInvoke * invoke,bool is_min,ArmVIXLAssembler * assembler)676 static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
677   Location op1_loc = invoke->GetLocations()->InAt(0);
678   Location op2_loc = invoke->GetLocations()->InAt(1);
679   Location out_loc = invoke->GetLocations()->Out();
680 
681   // Optimization: don't generate any code if inputs are the same.
682   if (op1_loc.Equals(op2_loc)) {
683     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
684     return;
685   }
686 
687   vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
688   vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
689   vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
690   vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
691   vixl32::Register out_lo = LowRegisterFrom(out_loc);
692   vixl32::Register out_hi = HighRegisterFrom(out_loc);
693   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
694   const vixl32::Register temp = temps.Acquire();
695 
696   DCHECK(op1_lo.Is(out_lo));
697   DCHECK(op1_hi.Is(out_hi));
698 
699   // Compare op1 >= op2, or op1 < op2.
700   __ Cmp(out_lo, op2_lo);
701   __ Sbcs(temp, out_hi, op2_hi);
702 
703   // Now GE/LT condition code is correct for the long comparison.
704   {
705     vixl32::ConditionType cond = is_min ? ge : lt;
706     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
707                                 3 * kMaxInstructionSizeInBytes,
708                                 CodeBufferCheckScope::kMaximumSize);
709     __ itt(cond);
710     __ mov(cond, out_lo, op2_lo);
711     __ mov(cond, out_hi, op2_hi);
712   }
713 }
714 
CreateLongLongToLongLocations(ArenaAllocator * arena,HInvoke * invoke)715 static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
716   LocationSummary* locations = new (arena) LocationSummary(invoke,
717                                                            LocationSummary::kNoCall,
718                                                            kIntrinsified);
719   locations->SetInAt(0, Location::RequiresRegister());
720   locations->SetInAt(1, Location::RequiresRegister());
721   locations->SetOut(Location::SameAsFirstInput());
722 }
723 
VisitMathMinLongLong(HInvoke * invoke)724 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
725   CreateLongLongToLongLocations(arena_, invoke);
726 }
727 
VisitMathMinLongLong(HInvoke * invoke)728 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
729   GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
730 }
731 
VisitMathMaxLongLong(HInvoke * invoke)732 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
733   CreateLongLongToLongLocations(arena_, invoke);
734 }
735 
VisitMathMaxLongLong(HInvoke * invoke)736 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
737   GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
738 }
739 
GenMinMax(HInvoke * invoke,bool is_min,ArmVIXLAssembler * assembler)740 static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
741   vixl32::Register op1 = InputRegisterAt(invoke, 0);
742   vixl32::Register op2 = InputRegisterAt(invoke, 1);
743   vixl32::Register out = OutputRegister(invoke);
744 
745   __ Cmp(op1, op2);
746 
747   {
748     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
749                            3 * kMaxInstructionSizeInBytes,
750                            CodeBufferCheckScope::kMaximumSize);
751 
752     __ ite(is_min ? lt : gt);
753     __ mov(is_min ? lt : gt, out, op1);
754     __ mov(is_min ? ge : le, out, op2);
755   }
756 }
757 
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)758 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
759   LocationSummary* locations = new (arena) LocationSummary(invoke,
760                                                            LocationSummary::kNoCall,
761                                                            kIntrinsified);
762   locations->SetInAt(0, Location::RequiresRegister());
763   locations->SetInAt(1, Location::RequiresRegister());
764   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
765 }
766 
VisitMathMinIntInt(HInvoke * invoke)767 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
768   CreateIntIntToIntLocations(arena_, invoke);
769 }
770 
VisitMathMinIntInt(HInvoke * invoke)771 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
772   GenMinMax(invoke, /* is_min */ true, GetAssembler());
773 }
774 
VisitMathMaxIntInt(HInvoke * invoke)775 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
776   CreateIntIntToIntLocations(arena_, invoke);
777 }
778 
VisitMathMaxIntInt(HInvoke * invoke)779 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
780   GenMinMax(invoke, /* is_min */ false, GetAssembler());
781 }
782 
VisitMathSqrt(HInvoke * invoke)783 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
784   CreateFPToFPLocations(arena_, invoke);
785 }
786 
VisitMathSqrt(HInvoke * invoke)787 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
788   ArmVIXLAssembler* assembler = GetAssembler();
789   __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
790 }
791 
VisitMathRint(HInvoke * invoke)792 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
793   if (features_.HasARMv8AInstructions()) {
794     CreateFPToFPLocations(arena_, invoke);
795   }
796 }
797 
VisitMathRint(HInvoke * invoke)798 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
799   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
800   ArmVIXLAssembler* assembler = GetAssembler();
801   __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
802 }
803 
VisitMathRoundFloat(HInvoke * invoke)804 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
805   if (features_.HasARMv8AInstructions()) {
806     LocationSummary* locations = new (arena_) LocationSummary(invoke,
807                                                               LocationSummary::kNoCall,
808                                                               kIntrinsified);
809     locations->SetInAt(0, Location::RequiresFpuRegister());
810     locations->SetOut(Location::RequiresRegister());
811     locations->AddTemp(Location::RequiresFpuRegister());
812   }
813 }
814 
VisitMathRoundFloat(HInvoke * invoke)815 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
816   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
817 
818   ArmVIXLAssembler* assembler = GetAssembler();
819   vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
820   vixl32::Register out_reg = OutputRegister(invoke);
821   vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
822   vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
823   vixl32::Label done;
824   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
825 
826   // Round to nearest integer, ties away from zero.
827   __ Vcvta(S32, F32, temp1, in_reg);
828   __ Vmov(out_reg, temp1);
829 
830   // For positive, zero or NaN inputs, rounding is done.
831   __ Cmp(out_reg, 0);
832   __ B(ge, final_label, /* far_target */ false);
833 
834   // Handle input < 0 cases.
835   // If input is negative but not a tie, previous result (round to nearest) is valid.
836   // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
837   __ Vrinta(F32, F32, temp1, in_reg);
838   __ Vmov(temp2, 0.5);
839   __ Vsub(F32, temp1, in_reg, temp1);
840   __ Vcmp(F32, temp1, temp2);
841   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
842   {
843     // Use ExactAsemblyScope here because we are using IT.
844     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
845                                 2 * kMaxInstructionSizeInBytes,
846                                 CodeBufferCheckScope::kMaximumSize);
847     __ it(eq);
848     __ add(eq, out_reg, out_reg, 1);
849   }
850 
851   if (done.IsReferenced()) {
852     __ Bind(&done);
853   }
854 }
855 
VisitMemoryPeekByte(HInvoke * invoke)856 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
857   CreateIntToIntLocations(arena_, invoke);
858 }
859 
VisitMemoryPeekByte(HInvoke * invoke)860 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
861   ArmVIXLAssembler* assembler = GetAssembler();
862   // Ignore upper 4B of long address.
863   __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
864 }
865 
VisitMemoryPeekIntNative(HInvoke * invoke)866 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
867   CreateIntToIntLocations(arena_, invoke);
868 }
869 
VisitMemoryPeekIntNative(HInvoke * invoke)870 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
871   ArmVIXLAssembler* assembler = GetAssembler();
872   // Ignore upper 4B of long address.
873   __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
874 }
875 
VisitMemoryPeekLongNative(HInvoke * invoke)876 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
877   CreateIntToIntLocations(arena_, invoke);
878 }
879 
VisitMemoryPeekLongNative(HInvoke * invoke)880 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
881   ArmVIXLAssembler* assembler = GetAssembler();
882   // Ignore upper 4B of long address.
883   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
884   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
885   // exception. So we can't use ldrd as addr may be unaligned.
886   vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
887   vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
888   if (addr.Is(lo)) {
889     __ Ldr(hi, MemOperand(addr, 4));
890     __ Ldr(lo, MemOperand(addr));
891   } else {
892     __ Ldr(lo, MemOperand(addr));
893     __ Ldr(hi, MemOperand(addr, 4));
894   }
895 }
896 
VisitMemoryPeekShortNative(HInvoke * invoke)897 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
898   CreateIntToIntLocations(arena_, invoke);
899 }
900 
VisitMemoryPeekShortNative(HInvoke * invoke)901 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
902   ArmVIXLAssembler* assembler = GetAssembler();
903   // Ignore upper 4B of long address.
904   __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
905 }
906 
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)907 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
908   LocationSummary* locations = new (arena) LocationSummary(invoke,
909                                                            LocationSummary::kNoCall,
910                                                            kIntrinsified);
911   locations->SetInAt(0, Location::RequiresRegister());
912   locations->SetInAt(1, Location::RequiresRegister());
913 }
914 
VisitMemoryPokeByte(HInvoke * invoke)915 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
916   CreateIntIntToVoidLocations(arena_, invoke);
917 }
918 
VisitMemoryPokeByte(HInvoke * invoke)919 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
920   ArmVIXLAssembler* assembler = GetAssembler();
921   __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
922 }
923 
VisitMemoryPokeIntNative(HInvoke * invoke)924 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
925   CreateIntIntToVoidLocations(arena_, invoke);
926 }
927 
VisitMemoryPokeIntNative(HInvoke * invoke)928 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
929   ArmVIXLAssembler* assembler = GetAssembler();
930   __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
931 }
932 
VisitMemoryPokeLongNative(HInvoke * invoke)933 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
934   CreateIntIntToVoidLocations(arena_, invoke);
935 }
936 
VisitMemoryPokeLongNative(HInvoke * invoke)937 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
938   ArmVIXLAssembler* assembler = GetAssembler();
939   // Ignore upper 4B of long address.
940   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
941   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
942   // exception. So we can't use ldrd as addr may be unaligned.
943   __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
944   __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
945 }
946 
VisitMemoryPokeShortNative(HInvoke * invoke)947 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
948   CreateIntIntToVoidLocations(arena_, invoke);
949 }
950 
VisitMemoryPokeShortNative(HInvoke * invoke)951 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
952   ArmVIXLAssembler* assembler = GetAssembler();
953   __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
954 }
955 
VisitThreadCurrentThread(HInvoke * invoke)956 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
957   LocationSummary* locations = new (arena_) LocationSummary(invoke,
958                                                             LocationSummary::kNoCall,
959                                                             kIntrinsified);
960   locations->SetOut(Location::RequiresRegister());
961 }
962 
VisitThreadCurrentThread(HInvoke * invoke)963 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
964   ArmVIXLAssembler* assembler = GetAssembler();
965   __ Ldr(OutputRegister(invoke),
966          MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
967 }
968 
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile,CodeGeneratorARMVIXL * codegen)969 static void GenUnsafeGet(HInvoke* invoke,
970                          Primitive::Type type,
971                          bool is_volatile,
972                          CodeGeneratorARMVIXL* codegen) {
973   LocationSummary* locations = invoke->GetLocations();
974   ArmVIXLAssembler* assembler = codegen->GetAssembler();
975   Location base_loc = locations->InAt(1);
976   vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
977   Location offset_loc = locations->InAt(2);
978   vixl32::Register offset = LowRegisterFrom(offset_loc);  // Long offset, lo part only.
979   Location trg_loc = locations->Out();
980 
981   switch (type) {
982     case Primitive::kPrimInt: {
983       vixl32::Register trg = RegisterFrom(trg_loc);
984       __ Ldr(trg, MemOperand(base, offset));
985       if (is_volatile) {
986         __ Dmb(vixl32::ISH);
987       }
988       break;
989     }
990 
991     case Primitive::kPrimNot: {
992       vixl32::Register trg = RegisterFrom(trg_loc);
993       if (kEmitCompilerReadBarrier) {
994         if (kUseBakerReadBarrier) {
995           Location temp = locations->GetTemp(0);
996           codegen->GenerateReferenceLoadWithBakerReadBarrier(
997               invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
998           if (is_volatile) {
999             __ Dmb(vixl32::ISH);
1000           }
1001         } else {
1002           __ Ldr(trg, MemOperand(base, offset));
1003           if (is_volatile) {
1004             __ Dmb(vixl32::ISH);
1005           }
1006           codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
1007         }
1008       } else {
1009         __ Ldr(trg, MemOperand(base, offset));
1010         if (is_volatile) {
1011           __ Dmb(vixl32::ISH);
1012         }
1013         assembler->MaybeUnpoisonHeapReference(trg);
1014       }
1015       break;
1016     }
1017 
1018     case Primitive::kPrimLong: {
1019       vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
1020       vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
1021       if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
1022         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1023         const vixl32::Register temp_reg = temps.Acquire();
1024         __ Add(temp_reg, base, offset);
1025         __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
1026       } else {
1027         __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
1028       }
1029       if (is_volatile) {
1030         __ Dmb(vixl32::ISH);
1031       }
1032       break;
1033     }
1034 
1035     default:
1036       LOG(FATAL) << "Unexpected type " << type;
1037       UNREACHABLE();
1038   }
1039 }
1040 
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)1041 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
1042                                           HInvoke* invoke,
1043                                           Primitive::Type type) {
1044   bool can_call = kEmitCompilerReadBarrier &&
1045       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1046        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1047   LocationSummary* locations = new (arena) LocationSummary(invoke,
1048                                                            (can_call
1049                                                                 ? LocationSummary::kCallOnSlowPath
1050                                                                 : LocationSummary::kNoCall),
1051                                                            kIntrinsified);
1052   if (can_call && kUseBakerReadBarrier) {
1053     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1054   }
1055   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1056   locations->SetInAt(1, Location::RequiresRegister());
1057   locations->SetInAt(2, Location::RequiresRegister());
1058   locations->SetOut(Location::RequiresRegister(),
1059                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1060   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1061     // We need a temporary register for the read barrier marking slow
1062     // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
1063     locations->AddTemp(Location::RequiresRegister());
1064   }
1065 }
1066 
VisitUnsafeGet(HInvoke * invoke)1067 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
1068   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
1069 }
VisitUnsafeGetVolatile(HInvoke * invoke)1070 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
1071   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
1072 }
VisitUnsafeGetLong(HInvoke * invoke)1073 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
1074   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
1075 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1076 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1077   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
1078 }
VisitUnsafeGetObject(HInvoke * invoke)1079 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
1080   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
1081 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1082 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1083   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
1084 }
1085 
VisitUnsafeGet(HInvoke * invoke)1086 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
1087   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
1088 }
VisitUnsafeGetVolatile(HInvoke * invoke)1089 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
1090   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
1091 }
VisitUnsafeGetLong(HInvoke * invoke)1092 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
1093   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
1094 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1095 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1096   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
1097 }
VisitUnsafeGetObject(HInvoke * invoke)1098 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
1099   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
1100 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1101 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1102   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
1103 }
1104 
CreateIntIntIntIntToVoid(ArenaAllocator * arena,const ArmInstructionSetFeatures & features,Primitive::Type type,bool is_volatile,HInvoke * invoke)1105 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
1106                                      const ArmInstructionSetFeatures& features,
1107                                      Primitive::Type type,
1108                                      bool is_volatile,
1109                                      HInvoke* invoke) {
1110   LocationSummary* locations = new (arena) LocationSummary(invoke,
1111                                                            LocationSummary::kNoCall,
1112                                                            kIntrinsified);
1113   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1114   locations->SetInAt(1, Location::RequiresRegister());
1115   locations->SetInAt(2, Location::RequiresRegister());
1116   locations->SetInAt(3, Location::RequiresRegister());
1117 
1118   if (type == Primitive::kPrimLong) {
1119     // Potentially need temps for ldrexd-strexd loop.
1120     if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
1121       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
1122       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
1123     }
1124   } else if (type == Primitive::kPrimNot) {
1125     // Temps for card-marking.
1126     locations->AddTemp(Location::RequiresRegister());  // Temp.
1127     locations->AddTemp(Location::RequiresRegister());  // Card.
1128   }
1129 }
1130 
VisitUnsafePut(HInvoke * invoke)1131 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
1132   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
1133 }
VisitUnsafePutOrdered(HInvoke * invoke)1134 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
1135   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
1136 }
VisitUnsafePutVolatile(HInvoke * invoke)1137 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
1138   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
1139 }
VisitUnsafePutObject(HInvoke * invoke)1140 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
1141   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
1142 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1143 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1144   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
1145 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1146 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1147   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
1148 }
VisitUnsafePutLong(HInvoke * invoke)1149 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1150   CreateIntIntIntIntToVoid(
1151       arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
1152 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1153 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1154   CreateIntIntIntIntToVoid(
1155       arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
1156 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1157 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1158   CreateIntIntIntIntToVoid(
1159       arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
1160 }
1161 
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARMVIXL * codegen)1162 static void GenUnsafePut(LocationSummary* locations,
1163                          Primitive::Type type,
1164                          bool is_volatile,
1165                          bool is_ordered,
1166                          CodeGeneratorARMVIXL* codegen) {
1167   ArmVIXLAssembler* assembler = codegen->GetAssembler();
1168 
1169   vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
1170   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
1171   vixl32::Register value;
1172 
1173   if (is_volatile || is_ordered) {
1174     __ Dmb(vixl32::ISH);
1175   }
1176 
1177   if (type == Primitive::kPrimLong) {
1178     vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
1179     vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
1180     value = value_lo;
1181     if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
1182       vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
1183       vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
1184       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1185       const vixl32::Register temp_reg = temps.Acquire();
1186 
1187       __ Add(temp_reg, base, offset);
1188       vixl32::Label loop_head;
1189       __ Bind(&loop_head);
1190       __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
1191       __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
1192       __ Cmp(temp_lo, 0);
1193       __ B(ne, &loop_head, /* far_target */ false);
1194     } else {
1195       __ Strd(value_lo, value_hi, MemOperand(base, offset));
1196     }
1197   } else {
1198     value = RegisterFrom(locations->InAt(3));
1199     vixl32::Register source = value;
1200     if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1201       vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1202       __ Mov(temp, value);
1203       assembler->PoisonHeapReference(temp);
1204       source = temp;
1205     }
1206     __ Str(source, MemOperand(base, offset));
1207   }
1208 
1209   if (is_volatile) {
1210     __ Dmb(vixl32::ISH);
1211   }
1212 
1213   if (type == Primitive::kPrimNot) {
1214     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1215     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
1216     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1217     codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
1218   }
1219 }
1220 
VisitUnsafePut(HInvoke * invoke)1221 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
1222   GenUnsafePut(invoke->GetLocations(),
1223                Primitive::kPrimInt,
1224                /* is_volatile */ false,
1225                /* is_ordered */ false,
1226                codegen_);
1227 }
VisitUnsafePutOrdered(HInvoke * invoke)1228 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
1229   GenUnsafePut(invoke->GetLocations(),
1230                Primitive::kPrimInt,
1231                /* is_volatile */ false,
1232                /* is_ordered */ true,
1233                codegen_);
1234 }
VisitUnsafePutVolatile(HInvoke * invoke)1235 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
1236   GenUnsafePut(invoke->GetLocations(),
1237                Primitive::kPrimInt,
1238                /* is_volatile */ true,
1239                /* is_ordered */ false,
1240                codegen_);
1241 }
VisitUnsafePutObject(HInvoke * invoke)1242 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
1243   GenUnsafePut(invoke->GetLocations(),
1244                Primitive::kPrimNot,
1245                /* is_volatile */ false,
1246                /* is_ordered */ false,
1247                codegen_);
1248 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1249 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1250   GenUnsafePut(invoke->GetLocations(),
1251                Primitive::kPrimNot,
1252                /* is_volatile */ false,
1253                /* is_ordered */ true,
1254                codegen_);
1255 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1256 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1257   GenUnsafePut(invoke->GetLocations(),
1258                Primitive::kPrimNot,
1259                /* is_volatile */ true,
1260                /* is_ordered */ false,
1261                codegen_);
1262 }
VisitUnsafePutLong(HInvoke * invoke)1263 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1264   GenUnsafePut(invoke->GetLocations(),
1265                Primitive::kPrimLong,
1266                /* is_volatile */ false,
1267                /* is_ordered */ false,
1268                codegen_);
1269 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1270 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1271   GenUnsafePut(invoke->GetLocations(),
1272                Primitive::kPrimLong,
1273                /* is_volatile */ false,
1274                /* is_ordered */ true,
1275                codegen_);
1276 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1277 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1278   GenUnsafePut(invoke->GetLocations(),
1279                Primitive::kPrimLong,
1280                /* is_volatile */ true,
1281                /* is_ordered */ false,
1282                codegen_);
1283 }
1284 
CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)1285 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
1286                                                 HInvoke* invoke,
1287                                                 Primitive::Type type) {
1288   bool can_call = kEmitCompilerReadBarrier &&
1289       kUseBakerReadBarrier &&
1290       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1291   LocationSummary* locations = new (arena) LocationSummary(invoke,
1292                                                            (can_call
1293                                                                 ? LocationSummary::kCallOnSlowPath
1294                                                                 : LocationSummary::kNoCall),
1295                                                            kIntrinsified);
1296   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1297   locations->SetInAt(1, Location::RequiresRegister());
1298   locations->SetInAt(2, Location::RequiresRegister());
1299   locations->SetInAt(3, Location::RequiresRegister());
1300   locations->SetInAt(4, Location::RequiresRegister());
1301 
1302   // If heap poisoning is enabled, we don't want the unpoisoning
1303   // operations to potentially clobber the output. Likewise when
1304   // emitting a (Baker) read barrier, which may call.
1305   Location::OutputOverlap overlaps =
1306       ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
1307       ? Location::kOutputOverlap
1308       : Location::kNoOutputOverlap;
1309   locations->SetOut(Location::RequiresRegister(), overlaps);
1310 
1311   // Temporary registers used in CAS. In the object case
1312   // (UnsafeCASObject intrinsic), these are also used for
1313   // card-marking, and possibly for (Baker) read barrier.
1314   locations->AddTemp(Location::RequiresRegister());  // Pointer.
1315   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
1316 }
1317 
GenCas(HInvoke * invoke,Primitive::Type type,CodeGeneratorARMVIXL * codegen)1318 static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
1319   DCHECK_NE(type, Primitive::kPrimLong);
1320 
1321   ArmVIXLAssembler* assembler = codegen->GetAssembler();
1322   LocationSummary* locations = invoke->GetLocations();
1323 
1324   Location out_loc = locations->Out();
1325   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
1326 
1327   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
1328   Location offset_loc = locations->InAt(2);
1329   vixl32::Register offset = LowRegisterFrom(offset_loc);              // Offset (discard high 4B).
1330   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
1331   vixl32::Register value = InputRegisterAt(invoke, 4);                // Value.
1332 
1333   Location tmp_ptr_loc = locations->GetTemp(0);
1334   vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc);               // Pointer to actual memory.
1335   vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Value in memory.
1336 
1337   if (type == Primitive::kPrimNot) {
1338     // The only read barrier implementation supporting the
1339     // UnsafeCASObject intrinsic is the Baker-style read barriers.
1340     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1341 
1342     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1343     // object and scan the receiver at the next GC for nothing.
1344     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1345     codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1346 
1347     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1348       // Need to make sure the reference stored in the field is a to-space
1349       // one before attempting the CAS or the CAS could fail incorrectly.
1350       codegen->GenerateReferenceLoadWithBakerReadBarrier(
1351           invoke,
1352           out_loc,  // Unused, used only as a "temporary" within the read barrier.
1353           base,
1354           /* offset */ 0u,
1355           /* index */ offset_loc,
1356           ScaleFactor::TIMES_1,
1357           tmp_ptr_loc,
1358           /* needs_null_check */ false,
1359           /* always_update_field */ true,
1360           &tmp);
1361     }
1362   }
1363 
1364   // Prevent reordering with prior memory operations.
1365   // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1366   // latter allows a preceding load to be delayed past the STXR
1367   // instruction below.
1368   __ Dmb(vixl32::ISH);
1369 
1370   __ Add(tmp_ptr, base, offset);
1371 
1372   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1373     codegen->GetAssembler()->PoisonHeapReference(expected);
1374     if (value.Is(expected)) {
1375       // Do not poison `value`, as it is the same register as
1376       // `expected`, which has just been poisoned.
1377     } else {
1378       codegen->GetAssembler()->PoisonHeapReference(value);
1379     }
1380   }
1381 
1382   // do {
1383   //   tmp = [r_ptr] - expected;
1384   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1385   // result = tmp != 0;
1386 
1387   vixl32::Label loop_head;
1388   __ Bind(&loop_head);
1389 
1390   __ Ldrex(tmp, MemOperand(tmp_ptr));
1391 
1392   __ Subs(tmp, tmp, expected);
1393 
1394   {
1395     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1396                            3 * kMaxInstructionSizeInBytes,
1397                            CodeBufferCheckScope::kMaximumSize);
1398 
1399     __ itt(eq);
1400     __ strex(eq, tmp, value, MemOperand(tmp_ptr));
1401     __ cmp(eq, tmp, 1);
1402   }
1403 
1404   __ B(eq, &loop_head, /* far_target */ false);
1405 
1406   __ Dmb(vixl32::ISH);
1407 
1408   __ Rsbs(out, tmp, 1);
1409 
1410   {
1411     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1412                            2 * kMaxInstructionSizeInBytes,
1413                            CodeBufferCheckScope::kMaximumSize);
1414 
1415     __ it(cc);
1416     __ mov(cc, out, 0);
1417   }
1418 
1419   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1420     codegen->GetAssembler()->UnpoisonHeapReference(expected);
1421     if (value.Is(expected)) {
1422       // Do not unpoison `value`, as it is the same register as
1423       // `expected`, which has just been unpoisoned.
1424     } else {
1425       codegen->GetAssembler()->UnpoisonHeapReference(value);
1426     }
1427   }
1428 }
1429 
VisitUnsafeCASInt(HInvoke * invoke)1430 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1431   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
1432 }
VisitUnsafeCASObject(HInvoke * invoke)1433 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1434   // The only read barrier implementation supporting the
1435   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1436   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1437     return;
1438   }
1439 
1440   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
1441 }
VisitUnsafeCASInt(HInvoke * invoke)1442 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1443   GenCas(invoke, Primitive::kPrimInt, codegen_);
1444 }
VisitUnsafeCASObject(HInvoke * invoke)1445 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1446   // The only read barrier implementation supporting the
1447   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1448   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1449 
1450   GenCas(invoke, Primitive::kPrimNot, codegen_);
1451 }
1452 
VisitStringCompareTo(HInvoke * invoke)1453 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1454   // The inputs plus one temp.
1455   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1456                                                             invoke->InputAt(1)->CanBeNull()
1457                                                                 ? LocationSummary::kCallOnSlowPath
1458                                                                 : LocationSummary::kNoCall,
1459                                                             kIntrinsified);
1460   locations->SetInAt(0, Location::RequiresRegister());
1461   locations->SetInAt(1, Location::RequiresRegister());
1462   locations->AddTemp(Location::RequiresRegister());
1463   locations->AddTemp(Location::RequiresRegister());
1464   locations->AddTemp(Location::RequiresRegister());
1465   // Need temporary registers for String compression's feature.
1466   if (mirror::kUseStringCompression) {
1467     locations->AddTemp(Location::RequiresRegister());
1468   }
1469   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1470 }
1471 
VisitStringCompareTo(HInvoke * invoke)1472 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1473   ArmVIXLAssembler* assembler = GetAssembler();
1474   LocationSummary* locations = invoke->GetLocations();
1475 
1476   vixl32::Register str = InputRegisterAt(invoke, 0);
1477   vixl32::Register arg = InputRegisterAt(invoke, 1);
1478   vixl32::Register out = OutputRegister(invoke);
1479 
1480   vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1481   vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1482   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1483   vixl32::Register temp3;
1484   if (mirror::kUseStringCompression) {
1485     temp3 = RegisterFrom(locations->GetTemp(3));
1486   }
1487 
1488   vixl32::Label loop;
1489   vixl32::Label find_char_diff;
1490   vixl32::Label end;
1491   vixl32::Label different_compression;
1492 
1493   // Get offsets of count and value fields within a string object.
1494   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1495   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1496 
1497   // Note that the null check must have been done earlier.
1498   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1499 
1500   // Take slow path and throw if input can be and is null.
1501   SlowPathCodeARMVIXL* slow_path = nullptr;
1502   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1503   if (can_slow_path) {
1504     slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1505     codegen_->AddSlowPath(slow_path);
1506     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
1507   }
1508 
1509   // Reference equality check, return 0 if same reference.
1510   __ Subs(out, str, arg);
1511   __ B(eq, &end);
1512 
1513   if (mirror::kUseStringCompression) {
1514     // Load `count` fields of this and argument strings.
1515     __ Ldr(temp3, MemOperand(str, count_offset));
1516     __ Ldr(temp2, MemOperand(arg, count_offset));
1517     // Extract lengths from the `count` fields.
1518     __ Lsr(temp0, temp3, 1u);
1519     __ Lsr(temp1, temp2, 1u);
1520   } else {
1521     // Load lengths of this and argument strings.
1522     __ Ldr(temp0, MemOperand(str, count_offset));
1523     __ Ldr(temp1, MemOperand(arg, count_offset));
1524   }
1525   // out = length diff.
1526   __ Subs(out, temp0, temp1);
1527   // temp0 = min(len(str), len(arg)).
1528 
1529   {
1530     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1531                            2 * kMaxInstructionSizeInBytes,
1532                            CodeBufferCheckScope::kMaximumSize);
1533 
1534     __ it(gt);
1535     __ mov(gt, temp0, temp1);
1536   }
1537 
1538   // Shorter string is empty?
1539   // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1540   // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1541   __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
1542 
1543   if (mirror::kUseStringCompression) {
1544     // Check if both strings using same compression style to use this comparison loop.
1545     __ Eors(temp2, temp2, temp3);
1546     __ Lsrs(temp2, temp2, 1u);
1547     __ B(cs, &different_compression);
1548     // For string compression, calculate the number of bytes to compare (not chars).
1549     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1550     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
1551 
1552     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1553                            2 * kMaxInstructionSizeInBytes,
1554                            CodeBufferCheckScope::kMaximumSize);
1555 
1556     __ it(ne);
1557     __ add(ne, temp0, temp0, temp0);
1558   }
1559 
1560   // Store offset of string value in preparation for comparison loop.
1561   __ Mov(temp1, value_offset);
1562 
1563   // Assertions that must hold in order to compare multiple characters at a time.
1564   CHECK_ALIGNED(value_offset, 8);
1565   static_assert(IsAligned<8>(kObjectAlignment),
1566                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1567 
1568   const unsigned char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1569   DCHECK_EQ(char_size, 2u);
1570 
1571   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1572 
1573   vixl32::Label find_char_diff_2nd_cmp;
1574   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1575   __ Bind(&loop);
1576   vixl32::Register temp_reg = temps.Acquire();
1577   __ Ldr(temp_reg, MemOperand(str, temp1));
1578   __ Ldr(temp2, MemOperand(arg, temp1));
1579   __ Cmp(temp_reg, temp2);
1580   __ B(ne, &find_char_diff, /* far_target */ false);
1581   __ Add(temp1, temp1, char_size * 2);
1582 
1583   __ Ldr(temp_reg, MemOperand(str, temp1));
1584   __ Ldr(temp2, MemOperand(arg, temp1));
1585   __ Cmp(temp_reg, temp2);
1586   __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false);
1587   __ Add(temp1, temp1, char_size * 2);
1588   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1589   __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1590   __ B(hi, &loop, /* far_target */ false);
1591   __ B(&end);
1592 
1593   __ Bind(&find_char_diff_2nd_cmp);
1594   if (mirror::kUseStringCompression) {
1595     __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
1596     __ B(ls, &end, /* far_target */ false);  // Was the second comparison fully beyond the end?
1597   } else {
1598     // Without string compression, we can start treating temp0 as signed
1599     // and rely on the signed comparison below.
1600     __ Sub(temp0, temp0, 2);
1601   }
1602 
1603   // Find the single character difference.
1604   __ Bind(&find_char_diff);
1605   // Get the bit position of the first character that differs.
1606   __ Eor(temp1, temp2, temp_reg);
1607   __ Rbit(temp1, temp1);
1608   __ Clz(temp1, temp1);
1609 
1610   // temp0 = number of characters remaining to compare.
1611   // (Without string compression, it could be < 1 if a difference is found by the second CMP
1612   // in the comparison loop, and after the end of the shorter string data).
1613 
1614   // Without string compression (temp1 >> 4) = character where difference occurs between the last
1615   // two words compared, in the interval [0,1].
1616   // (0 for low half-word different, 1 for high half-word different).
1617   // With string compression, (temp1 << 3) = byte where the difference occurs,
1618   // in the interval [0,3].
1619 
1620   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1621   // the remaining string data, so just return length diff (out).
1622   // The comparison is unsigned for string compression, otherwise signed.
1623   __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1624   __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false);
1625 
1626   // Extract the characters and calculate the difference.
1627   if (mirror::kUseStringCompression) {
1628     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1629     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1630     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
1631     __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
1632     __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
1633     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
1634     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
1635     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
1636     __ Lsr(out, temp_reg, temp1);                       // Extract first character.
1637     __ And(temp2, temp2, temp3);
1638     __ And(out, out, temp3);
1639   } else {
1640     __ Bic(temp1, temp1, 0xf);
1641     __ Lsr(temp2, temp2, temp1);
1642     __ Lsr(out, temp_reg, temp1);
1643     __ Movt(temp2, 0);
1644     __ Movt(out, 0);
1645   }
1646 
1647   __ Sub(out, out, temp2);
1648   temps.Release(temp_reg);
1649 
1650   if (mirror::kUseStringCompression) {
1651     __ B(&end);
1652     __ Bind(&different_compression);
1653 
1654     // Comparison for different compression style.
1655     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1656     DCHECK_EQ(c_char_size, 1u);
1657 
1658     // We want to free up the temp3, currently holding `str.count`, for comparison.
1659     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1660     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1661     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1662     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1663     __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
1664     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1665     __ Mov(temp1, str);
1666     __ Mov(temp2, arg);
1667     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
1668     {
1669       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1670                              3 * kMaxInstructionSizeInBytes,
1671                              CodeBufferCheckScope::kMaximumSize);
1672       __ itt(cs);                             // Interleave with selection of temp1 and temp2.
1673       __ mov(cs, temp1, arg);                 // Preserves flags.
1674       __ mov(cs, temp2, str);                 // Preserves flags.
1675     }
1676     __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
1677 
1678     // Adjust temp1 and temp2 from string pointers to data pointers.
1679     __ Add(temp1, temp1, value_offset);
1680     __ Add(temp2, temp2, value_offset);
1681 
1682     vixl32::Label different_compression_loop;
1683     vixl32::Label different_compression_diff;
1684 
1685     // Main loop for different compression.
1686     temp_reg = temps.Acquire();
1687     __ Bind(&different_compression_loop);
1688     __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1689     __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
1690     __ Cmp(temp_reg, temp3);
1691     __ B(ne, &different_compression_diff, /* far_target */ false);
1692     __ Subs(temp0, temp0, 2);
1693     __ B(hi, &different_compression_loop, /* far_target */ false);
1694     __ B(&end);
1695 
1696     // Calculate the difference.
1697     __ Bind(&different_compression_diff);
1698     __ Sub(out, temp_reg, temp3);
1699     temps.Release(temp_reg);
1700     // Flip the difference if the `arg` is compressed.
1701     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1702     __ Lsrs(temp0, temp0, 1u);
1703     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1704                   "Expecting 0=compressed, 1=uncompressed");
1705 
1706     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1707                            2 * kMaxInstructionSizeInBytes,
1708                            CodeBufferCheckScope::kMaximumSize);
1709     __ it(cc);
1710     __ rsb(cc, out, out, 0);
1711   }
1712 
1713   __ Bind(&end);
1714 
1715   if (can_slow_path) {
1716     __ Bind(slow_path->GetExitLabel());
1717   }
1718 }
1719 
VisitStringEquals(HInvoke * invoke)1720 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1721   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1722                                                             LocationSummary::kNoCall,
1723                                                             kIntrinsified);
1724   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1725   locations->SetInAt(0, Location::RequiresRegister());
1726   locations->SetInAt(1, Location::RequiresRegister());
1727   // Temporary registers to store lengths of strings and for calculations.
1728   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1729   locations->AddTemp(LocationFrom(r0));
1730   locations->AddTemp(Location::RequiresRegister());
1731   locations->AddTemp(Location::RequiresRegister());
1732 
1733   locations->SetOut(Location::RequiresRegister());
1734 }
1735 
VisitStringEquals(HInvoke * invoke)1736 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1737   ArmVIXLAssembler* assembler = GetAssembler();
1738   LocationSummary* locations = invoke->GetLocations();
1739 
1740   vixl32::Register str = InputRegisterAt(invoke, 0);
1741   vixl32::Register arg = InputRegisterAt(invoke, 1);
1742   vixl32::Register out = OutputRegister(invoke);
1743 
1744   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1745   vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1746   vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1747 
1748   vixl32::Label loop;
1749   vixl32::Label end;
1750   vixl32::Label return_true;
1751   vixl32::Label return_false;
1752   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
1753 
1754   // Get offsets of count, value, and class fields within a string object.
1755   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1756   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1757   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1758 
1759   // Note that the null check must have been done earlier.
1760   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1761 
1762   StringEqualsOptimizations optimizations(invoke);
1763   if (!optimizations.GetArgumentNotNull()) {
1764     // Check if input is null, return false if it is.
1765     __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
1766   }
1767 
1768   // Reference equality check, return true if same reference.
1769   __ Cmp(str, arg);
1770   __ B(eq, &return_true, /* far_target */ false);
1771 
1772   if (!optimizations.GetArgumentIsString()) {
1773     // Instanceof check for the argument by comparing class fields.
1774     // All string objects must have the same type since String cannot be subclassed.
1775     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1776     // If the argument is a string object, its class field must be equal to receiver's class field.
1777     __ Ldr(temp, MemOperand(str, class_offset));
1778     __ Ldr(temp1, MemOperand(arg, class_offset));
1779     __ Cmp(temp, temp1);
1780     __ B(ne, &return_false, /* far_target */ false);
1781   }
1782 
1783   // Load `count` fields of this and argument strings.
1784   __ Ldr(temp, MemOperand(str, count_offset));
1785   __ Ldr(temp1, MemOperand(arg, count_offset));
1786   // Check if `count` fields are equal, return false if they're not.
1787   // Also compares the compression style, if differs return false.
1788   __ Cmp(temp, temp1);
1789   __ B(ne, &return_false, /* far_target */ false);
1790   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1791   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1792                 "Expecting 0=compressed, 1=uncompressed");
1793   __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
1794 
1795   // Assertions that must hold in order to compare strings 4 bytes at a time.
1796   DCHECK_ALIGNED(value_offset, 4);
1797   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1798 
1799   if (mirror::kUseStringCompression) {
1800     // For string compression, calculate the number of bytes to compare (not chars).
1801     // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1802     __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
1803     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1804                            2 * kMaxInstructionSizeInBytes,
1805                            CodeBufferCheckScope::kMaximumSize);
1806     __ it(cs);                                      // If uncompressed,
1807     __ add(cs, temp, temp, temp);                   //   double the byte count.
1808   }
1809 
1810   // Store offset of string value in preparation for comparison loop.
1811   __ Mov(temp1, value_offset);
1812 
1813   // Loop to compare strings 4 bytes at a time starting at the front of the string.
1814   // Ok to do this because strings are zero-padded to kObjectAlignment.
1815   __ Bind(&loop);
1816   __ Ldr(out, MemOperand(str, temp1));
1817   __ Ldr(temp2, MemOperand(arg, temp1));
1818   __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1819   __ Cmp(out, temp2);
1820   __ B(ne, &return_false, /* far_target */ false);
1821   // With string compression, we have compared 4 bytes, otherwise 2 chars.
1822   __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1823   __ B(hi, &loop, /* far_target */ false);
1824 
1825   // Return true and exit the function.
1826   // If loop does not result in returning false, we return true.
1827   __ Bind(&return_true);
1828   __ Mov(out, 1);
1829   __ B(final_label);
1830 
1831   // Return false and exit the function.
1832   __ Bind(&return_false);
1833   __ Mov(out, 0);
1834 
1835   if (end.IsReferenced()) {
1836     __ Bind(&end);
1837   }
1838 }
1839 
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,ArenaAllocator * allocator,bool start_at_zero)1840 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1841                                        ArmVIXLAssembler* assembler,
1842                                        CodeGeneratorARMVIXL* codegen,
1843                                        ArenaAllocator* allocator,
1844                                        bool start_at_zero) {
1845   LocationSummary* locations = invoke->GetLocations();
1846 
1847   // Note that the null check must have been done earlier.
1848   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1849 
1850   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1851   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1852   SlowPathCodeARMVIXL* slow_path = nullptr;
1853   HInstruction* code_point = invoke->InputAt(1);
1854   if (code_point->IsIntConstant()) {
1855     if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1856         std::numeric_limits<uint16_t>::max()) {
1857       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1858       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1859       slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1860       codegen->AddSlowPath(slow_path);
1861       __ B(slow_path->GetEntryLabel());
1862       __ Bind(slow_path->GetExitLabel());
1863       return;
1864     }
1865   } else if (code_point->GetType() != Primitive::kPrimChar) {
1866     vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1867     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1868     __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1869     slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1870     codegen->AddSlowPath(slow_path);
1871     __ B(hs, slow_path->GetEntryLabel());
1872   }
1873 
1874   if (start_at_zero) {
1875     vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1876     DCHECK(tmp_reg.Is(r2));
1877     // Start-index = 0.
1878     __ Mov(tmp_reg, 0);
1879   }
1880 
1881   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1882   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1883 
1884   if (slow_path != nullptr) {
1885     __ Bind(slow_path->GetExitLabel());
1886   }
1887 }
1888 
VisitStringIndexOf(HInvoke * invoke)1889 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1890   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1891                                                             LocationSummary::kCallOnMainAndSlowPath,
1892                                                             kIntrinsified);
1893   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1894   // best to align the inputs accordingly.
1895   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1896   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1897   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1898   locations->SetOut(LocationFrom(r0));
1899 
1900   // Need to send start-index=0.
1901   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1902 }
1903 
VisitStringIndexOf(HInvoke * invoke)1904 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1905   GenerateVisitStringIndexOf(
1906       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1907 }
1908 
VisitStringIndexOfAfter(HInvoke * invoke)1909 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1910   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1911                                                             LocationSummary::kCallOnMainAndSlowPath,
1912                                                             kIntrinsified);
1913   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1914   // best to align the inputs accordingly.
1915   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1916   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1917   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1918   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1919   locations->SetOut(LocationFrom(r0));
1920 }
1921 
VisitStringIndexOfAfter(HInvoke * invoke)1922 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1923   GenerateVisitStringIndexOf(
1924       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1925 }
1926 
VisitStringNewStringFromBytes(HInvoke * invoke)1927 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1928   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1929                                                             LocationSummary::kCallOnMainAndSlowPath,
1930                                                             kIntrinsified);
1931   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1932   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1933   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1934   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1935   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1936   locations->SetOut(LocationFrom(r0));
1937 }
1938 
VisitStringNewStringFromBytes(HInvoke * invoke)1939 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1940   ArmVIXLAssembler* assembler = GetAssembler();
1941   vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1942   __ Cmp(byte_array, 0);
1943   SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1944   codegen_->AddSlowPath(slow_path);
1945   __ B(eq, slow_path->GetEntryLabel());
1946 
1947   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1948   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1949   __ Bind(slow_path->GetExitLabel());
1950 }
1951 
VisitStringNewStringFromChars(HInvoke * invoke)1952 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1953   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1954                                                             LocationSummary::kCallOnMainOnly,
1955                                                             kIntrinsified);
1956   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1957   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1958   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1959   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1960   locations->SetOut(LocationFrom(r0));
1961 }
1962 
VisitStringNewStringFromChars(HInvoke * invoke)1963 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1964   // No need to emit code checking whether `locations->InAt(2)` is a null
1965   // pointer, as callers of the native method
1966   //
1967   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1968   //
1969   // all include a null check on `data` before calling that method.
1970   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1971   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1972 }
1973 
VisitStringNewStringFromString(HInvoke * invoke)1974 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1975   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1976                                                             LocationSummary::kCallOnMainAndSlowPath,
1977                                                             kIntrinsified);
1978   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1979   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1980   locations->SetOut(LocationFrom(r0));
1981 }
1982 
VisitStringNewStringFromString(HInvoke * invoke)1983 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1984   ArmVIXLAssembler* assembler = GetAssembler();
1985   vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1986   __ Cmp(string_to_copy, 0);
1987   SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1988   codegen_->AddSlowPath(slow_path);
1989   __ B(eq, slow_path->GetEntryLabel());
1990 
1991   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1992   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1993 
1994   __ Bind(slow_path->GetExitLabel());
1995 }
1996 
VisitSystemArrayCopy(HInvoke * invoke)1997 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1998   // The only read barrier implementation supporting the
1999   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2000   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2001     return;
2002   }
2003 
2004   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2005   LocationSummary* locations = invoke->GetLocations();
2006   if (locations == nullptr) {
2007     return;
2008   }
2009 
2010   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2011   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2012   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2013 
2014   if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
2015     locations->SetInAt(1, Location::RequiresRegister());
2016   }
2017   if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
2018     locations->SetInAt(3, Location::RequiresRegister());
2019   }
2020   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
2021     locations->SetInAt(4, Location::RequiresRegister());
2022   }
2023   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2024     // Temporary register IP cannot be used in
2025     // ReadBarrierSystemArrayCopySlowPathARM (because that register
2026     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
2027     // temporary register from the register allocator.
2028     locations->AddTemp(Location::RequiresRegister());
2029   }
2030 }
2031 
CheckPosition(ArmVIXLAssembler * assembler,Location pos,vixl32::Register input,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_input_length=false)2032 static void CheckPosition(ArmVIXLAssembler* assembler,
2033                           Location pos,
2034                           vixl32::Register input,
2035                           Location length,
2036                           SlowPathCodeARMVIXL* slow_path,
2037                           vixl32::Register temp,
2038                           bool length_is_input_length = false) {
2039   // Where is the length in the Array?
2040   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
2041 
2042   if (pos.IsConstant()) {
2043     int32_t pos_const = Int32ConstantFrom(pos);
2044     if (pos_const == 0) {
2045       if (!length_is_input_length) {
2046         // Check that length(input) >= length.
2047         __ Ldr(temp, MemOperand(input, length_offset));
2048         if (length.IsConstant()) {
2049           __ Cmp(temp, Int32ConstantFrom(length));
2050         } else {
2051           __ Cmp(temp, RegisterFrom(length));
2052         }
2053         __ B(lt, slow_path->GetEntryLabel());
2054       }
2055     } else {
2056       // Check that length(input) >= pos.
2057       __ Ldr(temp, MemOperand(input, length_offset));
2058       __ Subs(temp, temp, pos_const);
2059       __ B(lt, slow_path->GetEntryLabel());
2060 
2061       // Check that (length(input) - pos) >= length.
2062       if (length.IsConstant()) {
2063         __ Cmp(temp, Int32ConstantFrom(length));
2064       } else {
2065         __ Cmp(temp, RegisterFrom(length));
2066       }
2067       __ B(lt, slow_path->GetEntryLabel());
2068     }
2069   } else if (length_is_input_length) {
2070     // The only way the copy can succeed is if pos is zero.
2071     vixl32::Register pos_reg = RegisterFrom(pos);
2072     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
2073   } else {
2074     // Check that pos >= 0.
2075     vixl32::Register pos_reg = RegisterFrom(pos);
2076     __ Cmp(pos_reg, 0);
2077     __ B(lt, slow_path->GetEntryLabel());
2078 
2079     // Check that pos <= length(input).
2080     __ Ldr(temp, MemOperand(input, length_offset));
2081     __ Subs(temp, temp, pos_reg);
2082     __ B(lt, slow_path->GetEntryLabel());
2083 
2084     // Check that (length(input) - pos) >= length.
2085     if (length.IsConstant()) {
2086       __ Cmp(temp, Int32ConstantFrom(length));
2087     } else {
2088       __ Cmp(temp, RegisterFrom(length));
2089     }
2090     __ B(lt, slow_path->GetEntryLabel());
2091   }
2092 }
2093 
VisitSystemArrayCopy(HInvoke * invoke)2094 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
2095   // The only read barrier implementation supporting the
2096   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2097   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2098 
2099   ArmVIXLAssembler* assembler = GetAssembler();
2100   LocationSummary* locations = invoke->GetLocations();
2101 
2102   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2103   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2104   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2105   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2106   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2107 
2108   vixl32::Register src = InputRegisterAt(invoke, 0);
2109   Location src_pos = locations->InAt(1);
2110   vixl32::Register dest = InputRegisterAt(invoke, 2);
2111   Location dest_pos = locations->InAt(3);
2112   Location length = locations->InAt(4);
2113   Location temp1_loc = locations->GetTemp(0);
2114   vixl32::Register temp1 = RegisterFrom(temp1_loc);
2115   Location temp2_loc = locations->GetTemp(1);
2116   vixl32::Register temp2 = RegisterFrom(temp2_loc);
2117   Location temp3_loc = locations->GetTemp(2);
2118   vixl32::Register temp3 = RegisterFrom(temp3_loc);
2119 
2120   SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2121   codegen_->AddSlowPath(intrinsic_slow_path);
2122 
2123   vixl32::Label conditions_on_positions_validated;
2124   SystemArrayCopyOptimizations optimizations(invoke);
2125 
2126   // If source and destination are the same, we go to slow path if we need to do
2127   // forward copying.
2128   if (src_pos.IsConstant()) {
2129     int32_t src_pos_constant = Int32ConstantFrom(src_pos);
2130     if (dest_pos.IsConstant()) {
2131       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2132       if (optimizations.GetDestinationIsSource()) {
2133         // Checked when building locations.
2134         DCHECK_GE(src_pos_constant, dest_pos_constant);
2135       } else if (src_pos_constant < dest_pos_constant) {
2136         __ Cmp(src, dest);
2137         __ B(eq, intrinsic_slow_path->GetEntryLabel());
2138       }
2139 
2140       // Checked when building locations.
2141       DCHECK(!optimizations.GetDestinationIsSource()
2142              || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
2143     } else {
2144       if (!optimizations.GetDestinationIsSource()) {
2145         __ Cmp(src, dest);
2146         __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
2147       }
2148       __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
2149       __ B(gt, intrinsic_slow_path->GetEntryLabel());
2150     }
2151   } else {
2152     if (!optimizations.GetDestinationIsSource()) {
2153       __ Cmp(src, dest);
2154       __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
2155     }
2156     if (dest_pos.IsConstant()) {
2157       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2158       __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
2159     } else {
2160       __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
2161     }
2162     __ B(lt, intrinsic_slow_path->GetEntryLabel());
2163   }
2164 
2165   __ Bind(&conditions_on_positions_validated);
2166 
2167   if (!optimizations.GetSourceIsNotNull()) {
2168     // Bail out if the source is null.
2169     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
2170   }
2171 
2172   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2173     // Bail out if the destination is null.
2174     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
2175   }
2176 
2177   // If the length is negative, bail out.
2178   // We have already checked in the LocationsBuilder for the constant case.
2179   if (!length.IsConstant() &&
2180       !optimizations.GetCountIsSourceLength() &&
2181       !optimizations.GetCountIsDestinationLength()) {
2182     __ Cmp(RegisterFrom(length), 0);
2183     __ B(lt, intrinsic_slow_path->GetEntryLabel());
2184   }
2185 
2186   // Validity checks: source.
2187   CheckPosition(assembler,
2188                 src_pos,
2189                 src,
2190                 length,
2191                 intrinsic_slow_path,
2192                 temp1,
2193                 optimizations.GetCountIsSourceLength());
2194 
2195   // Validity checks: dest.
2196   CheckPosition(assembler,
2197                 dest_pos,
2198                 dest,
2199                 length,
2200                 intrinsic_slow_path,
2201                 temp1,
2202                 optimizations.GetCountIsDestinationLength());
2203 
2204   if (!optimizations.GetDoesNotNeedTypeCheck()) {
2205     // Check whether all elements of the source array are assignable to the component
2206     // type of the destination array. We do two checks: the classes are the same,
2207     // or the destination is Object[]. If none of these checks succeed, we go to the
2208     // slow path.
2209 
2210     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2211       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2212         // /* HeapReference<Class> */ temp1 = src->klass_
2213         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2214             invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2215         // Bail out if the source is not a non primitive array.
2216         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2217         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2218             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2219         __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
2220         // If heap poisoning is enabled, `temp1` has been unpoisoned
2221         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2222         // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2223         __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
2224         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2225         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2226       }
2227 
2228       // /* HeapReference<Class> */ temp1 = dest->klass_
2229       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2230           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
2231 
2232       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2233         // Bail out if the destination is not a non primitive array.
2234         //
2235         // Register `temp1` is not trashed by the read barrier emitted
2236         // by GenerateFieldLoadWithBakerReadBarrier below, as that
2237         // method produces a call to a ReadBarrierMarkRegX entry point,
2238         // which saves all potentially live registers, including
2239         // temporaries such a `temp1`.
2240         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2241         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2242             invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
2243         __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
2244         // If heap poisoning is enabled, `temp2` has been unpoisoned
2245         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2246         // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2247         __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
2248         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2249         __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
2250       }
2251 
2252       // For the same reason given earlier, `temp1` is not trashed by the
2253       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2254       // /* HeapReference<Class> */ temp2 = src->klass_
2255       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2256           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
2257       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2258       __ Cmp(temp1, temp2);
2259 
2260       if (optimizations.GetDestinationIsTypedObjectArray()) {
2261         vixl32::Label do_copy;
2262         __ B(eq, &do_copy, /* far_target */ false);
2263         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2264         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2265             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2266         // /* HeapReference<Class> */ temp1 = temp1->super_class_
2267         // We do not need to emit a read barrier for the following
2268         // heap reference load, as `temp1` is only used in a
2269         // comparison with null below, and this reference is not
2270         // kept afterwards.
2271         __ Ldr(temp1, MemOperand(temp1, super_offset));
2272         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2273         __ Bind(&do_copy);
2274       } else {
2275         __ B(ne, intrinsic_slow_path->GetEntryLabel());
2276       }
2277     } else {
2278       // Non read barrier code.
2279 
2280       // /* HeapReference<Class> */ temp1 = dest->klass_
2281       __ Ldr(temp1, MemOperand(dest, class_offset));
2282       // /* HeapReference<Class> */ temp2 = src->klass_
2283       __ Ldr(temp2, MemOperand(src, class_offset));
2284       bool did_unpoison = false;
2285       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2286           !optimizations.GetSourceIsNonPrimitiveArray()) {
2287         // One or two of the references need to be unpoisoned. Unpoison them
2288         // both to make the identity check valid.
2289         assembler->MaybeUnpoisonHeapReference(temp1);
2290         assembler->MaybeUnpoisonHeapReference(temp2);
2291         did_unpoison = true;
2292       }
2293 
2294       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2295         // Bail out if the destination is not a non primitive array.
2296         // /* HeapReference<Class> */ temp3 = temp1->component_type_
2297         __ Ldr(temp3, MemOperand(temp1, component_offset));
2298         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2299         assembler->MaybeUnpoisonHeapReference(temp3);
2300         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2301         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2302         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2303         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2304       }
2305 
2306       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2307         // Bail out if the source is not a non primitive array.
2308         // /* HeapReference<Class> */ temp3 = temp2->component_type_
2309         __ Ldr(temp3, MemOperand(temp2, component_offset));
2310         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2311         assembler->MaybeUnpoisonHeapReference(temp3);
2312         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2313         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2314         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2315         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2316       }
2317 
2318       __ Cmp(temp1, temp2);
2319 
2320       if (optimizations.GetDestinationIsTypedObjectArray()) {
2321         vixl32::Label do_copy;
2322         __ B(eq, &do_copy, /* far_target */ false);
2323         if (!did_unpoison) {
2324           assembler->MaybeUnpoisonHeapReference(temp1);
2325         }
2326         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2327         __ Ldr(temp1, MemOperand(temp1, component_offset));
2328         assembler->MaybeUnpoisonHeapReference(temp1);
2329         // /* HeapReference<Class> */ temp1 = temp1->super_class_
2330         __ Ldr(temp1, MemOperand(temp1, super_offset));
2331         // No need to unpoison the result, we're comparing against null.
2332         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2333         __ Bind(&do_copy);
2334       } else {
2335         __ B(ne, intrinsic_slow_path->GetEntryLabel());
2336       }
2337     }
2338   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2339     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2340     // Bail out if the source is not a non primitive array.
2341     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2342       // /* HeapReference<Class> */ temp1 = src->klass_
2343       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2344           invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2345       // /* HeapReference<Class> */ temp3 = temp1->component_type_
2346       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2347           invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2348       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2349       // If heap poisoning is enabled, `temp3` has been unpoisoned
2350       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2351     } else {
2352       // /* HeapReference<Class> */ temp1 = src->klass_
2353       __ Ldr(temp1, MemOperand(src, class_offset));
2354       assembler->MaybeUnpoisonHeapReference(temp1);
2355       // /* HeapReference<Class> */ temp3 = temp1->component_type_
2356       __ Ldr(temp3, MemOperand(temp1, component_offset));
2357       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2358       assembler->MaybeUnpoisonHeapReference(temp3);
2359     }
2360     // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2361     __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2362     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2363     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2364   }
2365 
2366   if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
2367     // Null constant length: not need to emit the loop code at all.
2368   } else {
2369     vixl32::Label done;
2370     const Primitive::Type type = Primitive::kPrimNot;
2371     const int32_t element_size = Primitive::ComponentSize(type);
2372 
2373     if (length.IsRegister()) {
2374       // Don't enter the copy loop if the length is null.
2375       __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false);
2376     }
2377 
2378     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2379       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2380 
2381       // SystemArrayCopy implementation for Baker read barriers (see
2382       // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2383       //
2384       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2385       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2386       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
2387       //   if (is_gray) {
2388       //     // Slow-path copy.
2389       //     do {
2390       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2391       //     } while (src_ptr != end_ptr)
2392       //   } else {
2393       //     // Fast-path copy.
2394       //     do {
2395       //       *dest_ptr++ = *src_ptr++;
2396       //     } while (src_ptr != end_ptr)
2397       //   }
2398 
2399       // /* int32_t */ monitor = src->monitor_
2400       __ Ldr(temp2, MemOperand(src, monitor_offset));
2401       // /* LockWord */ lock_word = LockWord(monitor)
2402       static_assert(sizeof(LockWord) == sizeof(int32_t),
2403                     "art::LockWord and int32_t have different sizes.");
2404 
2405       // Introduce a dependency on the lock_word including the rb_state,
2406       // which shall prevent load-load reordering without using
2407       // a memory barrier (which would be more expensive).
2408       // `src` is unchanged by this operation, but its value now depends
2409       // on `temp2`.
2410       __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2411 
2412       // Compute the base source address in `temp1`.
2413       // Note that `temp1` (the base source address) is computed from
2414       // `src` (and `src_pos`) here, and thus honors the artificial
2415       // dependency of `src` on `temp2`.
2416       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2417       // Compute the end source address in `temp3`.
2418       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2419       // The base destination address is computed later, as `temp2` is
2420       // used for intermediate computations.
2421 
2422       // Slow path used to copy array when `src` is gray.
2423       // Note that the base destination address is computed in `temp2`
2424       // by the slow path code.
2425       SlowPathCodeARMVIXL* read_barrier_slow_path =
2426           new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2427       codegen_->AddSlowPath(read_barrier_slow_path);
2428 
2429       // Given the numeric representation, it's enough to check the low bit of the
2430       // rb_state. We do that by shifting the bit out of the lock word with LSRS
2431       // which can be a 16-bit instruction unlike the TST immediate.
2432       static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2433       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2434       __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2435       // Carry flag is the last bit shifted out by LSRS.
2436       __ B(cs, read_barrier_slow_path->GetEntryLabel());
2437 
2438       // Fast-path copy.
2439       // Compute the base destination address in `temp2`.
2440       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2441       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2442       // poison/unpoison.
2443       vixl32::Label loop;
2444       __ Bind(&loop);
2445       {
2446         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2447         const vixl32::Register temp_reg = temps.Acquire();
2448         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2449         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2450       }
2451       __ Cmp(temp1, temp3);
2452       __ B(ne, &loop, /* far_target */ false);
2453 
2454       __ Bind(read_barrier_slow_path->GetExitLabel());
2455     } else {
2456       // Non read barrier code.
2457       // Compute the base source address in `temp1`.
2458       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2459       // Compute the base destination address in `temp2`.
2460       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2461       // Compute the end source address in `temp3`.
2462       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2463       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2464       // poison/unpoison.
2465       vixl32::Label loop;
2466       __ Bind(&loop);
2467       {
2468         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2469         const vixl32::Register temp_reg = temps.Acquire();
2470         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2471         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2472       }
2473       __ Cmp(temp1, temp3);
2474       __ B(ne, &loop, /* far_target */ false);
2475     }
2476     __ Bind(&done);
2477   }
2478 
2479   // We only need one card marking on the destination array.
2480   codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2481 
2482   __ Bind(intrinsic_slow_path->GetExitLabel());
2483 }
2484 
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)2485 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2486   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2487   // the code generator. Furthermore, the register allocator creates fixed live intervals
2488   // for all caller-saved registers because we are doing a function call. As a result, if
2489   // the input and output locations are unallocated, the register allocator runs out of
2490   // registers and fails; however, a debuggable graph is not the common case.
2491   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2492     return;
2493   }
2494 
2495   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2496   DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2497   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2498 
2499   LocationSummary* const locations = new (arena) LocationSummary(invoke,
2500                                                                  LocationSummary::kCallOnMainOnly,
2501                                                                  kIntrinsified);
2502   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2503 
2504   locations->SetInAt(0, Location::RequiresFpuRegister());
2505   locations->SetOut(Location::RequiresFpuRegister());
2506   // Native code uses the soft float ABI.
2507   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2508   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2509 }
2510 
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)2511 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2512   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2513   // the code generator. Furthermore, the register allocator creates fixed live intervals
2514   // for all caller-saved registers because we are doing a function call. As a result, if
2515   // the input and output locations are unallocated, the register allocator runs out of
2516   // registers and fails; however, a debuggable graph is not the common case.
2517   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2518     return;
2519   }
2520 
2521   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2522   DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2523   DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
2524   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2525 
2526   LocationSummary* const locations = new (arena) LocationSummary(invoke,
2527                                                                  LocationSummary::kCallOnMainOnly,
2528                                                                  kIntrinsified);
2529   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2530 
2531   locations->SetInAt(0, Location::RequiresFpuRegister());
2532   locations->SetInAt(1, Location::RequiresFpuRegister());
2533   locations->SetOut(Location::RequiresFpuRegister());
2534   // Native code uses the soft float ABI.
2535   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2536   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2537   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2538   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2539 }
2540 
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2541 static void GenFPToFPCall(HInvoke* invoke,
2542                           ArmVIXLAssembler* assembler,
2543                           CodeGeneratorARMVIXL* codegen,
2544                           QuickEntrypointEnum entry) {
2545   LocationSummary* const locations = invoke->GetLocations();
2546 
2547   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2548   DCHECK(locations->WillCall() && locations->Intrinsified());
2549 
2550   // Native code uses the soft float ABI.
2551   __ Vmov(RegisterFrom(locations->GetTemp(0)),
2552           RegisterFrom(locations->GetTemp(1)),
2553           InputDRegisterAt(invoke, 0));
2554   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2555   __ Vmov(OutputDRegister(invoke),
2556           RegisterFrom(locations->GetTemp(0)),
2557           RegisterFrom(locations->GetTemp(1)));
2558 }
2559 
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2560 static void GenFPFPToFPCall(HInvoke* invoke,
2561                             ArmVIXLAssembler* assembler,
2562                             CodeGeneratorARMVIXL* codegen,
2563                             QuickEntrypointEnum entry) {
2564   LocationSummary* const locations = invoke->GetLocations();
2565 
2566   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2567   DCHECK(locations->WillCall() && locations->Intrinsified());
2568 
2569   // Native code uses the soft float ABI.
2570   __ Vmov(RegisterFrom(locations->GetTemp(0)),
2571           RegisterFrom(locations->GetTemp(1)),
2572           InputDRegisterAt(invoke, 0));
2573   __ Vmov(RegisterFrom(locations->GetTemp(2)),
2574           RegisterFrom(locations->GetTemp(3)),
2575           InputDRegisterAt(invoke, 1));
2576   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2577   __ Vmov(OutputDRegister(invoke),
2578           RegisterFrom(locations->GetTemp(0)),
2579           RegisterFrom(locations->GetTemp(1)));
2580 }
2581 
VisitMathCos(HInvoke * invoke)2582 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2583   CreateFPToFPCallLocations(arena_, invoke);
2584 }
2585 
VisitMathCos(HInvoke * invoke)2586 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2587   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2588 }
2589 
VisitMathSin(HInvoke * invoke)2590 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2591   CreateFPToFPCallLocations(arena_, invoke);
2592 }
2593 
VisitMathSin(HInvoke * invoke)2594 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2595   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2596 }
2597 
VisitMathAcos(HInvoke * invoke)2598 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2599   CreateFPToFPCallLocations(arena_, invoke);
2600 }
2601 
VisitMathAcos(HInvoke * invoke)2602 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2603   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2604 }
2605 
VisitMathAsin(HInvoke * invoke)2606 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2607   CreateFPToFPCallLocations(arena_, invoke);
2608 }
2609 
VisitMathAsin(HInvoke * invoke)2610 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2611   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2612 }
2613 
VisitMathAtan(HInvoke * invoke)2614 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2615   CreateFPToFPCallLocations(arena_, invoke);
2616 }
2617 
VisitMathAtan(HInvoke * invoke)2618 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2619   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2620 }
2621 
VisitMathCbrt(HInvoke * invoke)2622 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2623   CreateFPToFPCallLocations(arena_, invoke);
2624 }
2625 
VisitMathCbrt(HInvoke * invoke)2626 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2627   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2628 }
2629 
VisitMathCosh(HInvoke * invoke)2630 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2631   CreateFPToFPCallLocations(arena_, invoke);
2632 }
2633 
VisitMathCosh(HInvoke * invoke)2634 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2635   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2636 }
2637 
VisitMathExp(HInvoke * invoke)2638 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2639   CreateFPToFPCallLocations(arena_, invoke);
2640 }
2641 
VisitMathExp(HInvoke * invoke)2642 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2643   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2644 }
2645 
VisitMathExpm1(HInvoke * invoke)2646 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2647   CreateFPToFPCallLocations(arena_, invoke);
2648 }
2649 
VisitMathExpm1(HInvoke * invoke)2650 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2651   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2652 }
2653 
VisitMathLog(HInvoke * invoke)2654 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2655   CreateFPToFPCallLocations(arena_, invoke);
2656 }
2657 
VisitMathLog(HInvoke * invoke)2658 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2659   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2660 }
2661 
VisitMathLog10(HInvoke * invoke)2662 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2663   CreateFPToFPCallLocations(arena_, invoke);
2664 }
2665 
VisitMathLog10(HInvoke * invoke)2666 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2667   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2668 }
2669 
VisitMathSinh(HInvoke * invoke)2670 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2671   CreateFPToFPCallLocations(arena_, invoke);
2672 }
2673 
VisitMathSinh(HInvoke * invoke)2674 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2675   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2676 }
2677 
VisitMathTan(HInvoke * invoke)2678 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2679   CreateFPToFPCallLocations(arena_, invoke);
2680 }
2681 
VisitMathTan(HInvoke * invoke)2682 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2683   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2684 }
2685 
VisitMathTanh(HInvoke * invoke)2686 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2687   CreateFPToFPCallLocations(arena_, invoke);
2688 }
2689 
VisitMathTanh(HInvoke * invoke)2690 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2691   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2692 }
2693 
VisitMathAtan2(HInvoke * invoke)2694 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2695   CreateFPFPToFPCallLocations(arena_, invoke);
2696 }
2697 
VisitMathAtan2(HInvoke * invoke)2698 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2699   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2700 }
2701 
VisitMathHypot(HInvoke * invoke)2702 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2703   CreateFPFPToFPCallLocations(arena_, invoke);
2704 }
2705 
VisitMathHypot(HInvoke * invoke)2706 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2707   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2708 }
2709 
VisitMathNextAfter(HInvoke * invoke)2710 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2711   CreateFPFPToFPCallLocations(arena_, invoke);
2712 }
2713 
VisitMathNextAfter(HInvoke * invoke)2714 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2715   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2716 }
2717 
VisitIntegerReverse(HInvoke * invoke)2718 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2719   CreateIntToIntLocations(arena_, invoke);
2720 }
2721 
VisitIntegerReverse(HInvoke * invoke)2722 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2723   ArmVIXLAssembler* assembler = GetAssembler();
2724   __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2725 }
2726 
VisitLongReverse(HInvoke * invoke)2727 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2728   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2729                                                             LocationSummary::kNoCall,
2730                                                             kIntrinsified);
2731   locations->SetInAt(0, Location::RequiresRegister());
2732   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2733 }
2734 
VisitLongReverse(HInvoke * invoke)2735 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2736   ArmVIXLAssembler* assembler = GetAssembler();
2737   LocationSummary* locations = invoke->GetLocations();
2738 
2739   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
2740   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
2741   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2742   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2743 
2744   __ Rbit(out_reg_lo, in_reg_hi);
2745   __ Rbit(out_reg_hi, in_reg_lo);
2746 }
2747 
VisitIntegerReverseBytes(HInvoke * invoke)2748 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2749   CreateIntToIntLocations(arena_, invoke);
2750 }
2751 
VisitIntegerReverseBytes(HInvoke * invoke)2752 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2753   ArmVIXLAssembler* assembler = GetAssembler();
2754   __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2755 }
2756 
VisitLongReverseBytes(HInvoke * invoke)2757 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2758   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2759                                                             LocationSummary::kNoCall,
2760                                                             kIntrinsified);
2761   locations->SetInAt(0, Location::RequiresRegister());
2762   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2763 }
2764 
VisitLongReverseBytes(HInvoke * invoke)2765 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2766   ArmVIXLAssembler* assembler = GetAssembler();
2767   LocationSummary* locations = invoke->GetLocations();
2768 
2769   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
2770   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
2771   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2772   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2773 
2774   __ Rev(out_reg_lo, in_reg_hi);
2775   __ Rev(out_reg_hi, in_reg_lo);
2776 }
2777 
VisitShortReverseBytes(HInvoke * invoke)2778 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2779   CreateIntToIntLocations(arena_, invoke);
2780 }
2781 
VisitShortReverseBytes(HInvoke * invoke)2782 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2783   ArmVIXLAssembler* assembler = GetAssembler();
2784   __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2785 }
2786 
GenBitCount(HInvoke * instr,Primitive::Type type,ArmVIXLAssembler * assembler)2787 static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
2788   DCHECK(Primitive::IsIntOrLongType(type)) << type;
2789   DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
2790   DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
2791 
2792   bool is_long = type == Primitive::kPrimLong;
2793   LocationSummary* locations = instr->GetLocations();
2794   Location in = locations->InAt(0);
2795   vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2796   vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2797   vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2798   vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2799   vixl32::Register  out_r = OutputRegister(instr);
2800 
2801   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2802   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2803   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2804   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2805   __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
2806   __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
2807   __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
2808   __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
2809   if (is_long) {
2810     __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
2811   }
2812   __ Vmov(out_r, tmp_s);
2813 }
2814 
VisitIntegerBitCount(HInvoke * invoke)2815 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2816   CreateIntToIntLocations(arena_, invoke);
2817   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2818 }
2819 
VisitIntegerBitCount(HInvoke * invoke)2820 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2821   GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
2822 }
2823 
VisitLongBitCount(HInvoke * invoke)2824 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2825   VisitIntegerBitCount(invoke);
2826 }
2827 
VisitLongBitCount(HInvoke * invoke)2828 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2829   GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
2830 }
2831 
VisitStringGetCharsNoCheck(HInvoke * invoke)2832 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2833   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2834                                                             LocationSummary::kNoCall,
2835                                                             kIntrinsified);
2836   locations->SetInAt(0, Location::RequiresRegister());
2837   locations->SetInAt(1, Location::RequiresRegister());
2838   locations->SetInAt(2, Location::RequiresRegister());
2839   locations->SetInAt(3, Location::RequiresRegister());
2840   locations->SetInAt(4, Location::RequiresRegister());
2841 
2842   // Temporary registers to store lengths of strings and for calculations.
2843   locations->AddTemp(Location::RequiresRegister());
2844   locations->AddTemp(Location::RequiresRegister());
2845   locations->AddTemp(Location::RequiresRegister());
2846 }
2847 
VisitStringGetCharsNoCheck(HInvoke * invoke)2848 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2849   ArmVIXLAssembler* assembler = GetAssembler();
2850   LocationSummary* locations = invoke->GetLocations();
2851 
2852   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2853   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2854   DCHECK_EQ(char_size, 2u);
2855 
2856   // Location of data in char array buffer.
2857   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2858 
2859   // Location of char array data in string.
2860   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2861 
2862   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2863   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2864   vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2865   vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2866   vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2867   vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2868   vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2869 
2870   vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2871   vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2872   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2873 
2874   vixl32::Label done, compressed_string_loop;
2875   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2876   // dst to be copied.
2877   __ Add(dst_ptr, dstObj, data_offset);
2878   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2879 
2880   __ Subs(num_chr, srcEnd, srcBegin);
2881   // Early out for valid zero-length retrievals.
2882   __ B(eq, final_label, /* far_target */ false);
2883 
2884   // src range to copy.
2885   __ Add(src_ptr, srcObj, value_offset);
2886 
2887   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2888   vixl32::Register temp;
2889   vixl32::Label compressed_string_preloop;
2890   if (mirror::kUseStringCompression) {
2891     // Location of count in string.
2892     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2893     temp = temps.Acquire();
2894     // String's length.
2895     __ Ldr(temp, MemOperand(srcObj, count_offset));
2896     __ Tst(temp, 1);
2897     temps.Release(temp);
2898     __ B(eq, &compressed_string_preloop, /* far_target */ false);
2899   }
2900   __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2901 
2902   // Do the copy.
2903   vixl32::Label loop, remainder;
2904 
2905   temp = temps.Acquire();
2906   // Save repairing the value of num_chr on the < 4 character path.
2907   __ Subs(temp, num_chr, 4);
2908   __ B(lt, &remainder, /* far_target */ false);
2909 
2910   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2911   __ Mov(num_chr, temp);
2912 
2913   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2914   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2915   // to rectify these everywhere this intrinsic applies.)
2916   __ Bind(&loop);
2917   __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2918   __ Subs(num_chr, num_chr, 4);
2919   __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2920   __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2921   __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2922   temps.Release(temp);
2923   __ B(ge, &loop, /* far_target */ false);
2924 
2925   __ Adds(num_chr, num_chr, 4);
2926   __ B(eq, final_label, /* far_target */ false);
2927 
2928   // Main loop for < 4 character case and remainder handling. Loads and stores one
2929   // 16-bit Java character at a time.
2930   __ Bind(&remainder);
2931   temp = temps.Acquire();
2932   __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2933   __ Subs(num_chr, num_chr, 1);
2934   __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2935   temps.Release(temp);
2936   __ B(gt, &remainder, /* far_target */ false);
2937 
2938   if (mirror::kUseStringCompression) {
2939     __ B(final_label);
2940 
2941     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2942     DCHECK_EQ(c_char_size, 1u);
2943     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2944     __ Bind(&compressed_string_preloop);
2945     __ Add(src_ptr, src_ptr, srcBegin);
2946     __ Bind(&compressed_string_loop);
2947     temp = temps.Acquire();
2948     __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2949     __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2950     temps.Release(temp);
2951     __ Subs(num_chr, num_chr, 1);
2952     __ B(gt, &compressed_string_loop, /* far_target */ false);
2953   }
2954 
2955   if (done.IsReferenced()) {
2956     __ Bind(&done);
2957   }
2958 }
2959 
VisitFloatIsInfinite(HInvoke * invoke)2960 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2961   CreateFPToIntLocations(arena_, invoke);
2962 }
2963 
VisitFloatIsInfinite(HInvoke * invoke)2964 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2965   ArmVIXLAssembler* const assembler = GetAssembler();
2966   const vixl32::Register out = OutputRegister(invoke);
2967   // Shifting left by 1 bit makes the value encodable as an immediate operand;
2968   // we don't care about the sign bit anyway.
2969   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2970 
2971   __ Vmov(out, InputSRegisterAt(invoke, 0));
2972   // We don't care about the sign bit, so shift left.
2973   __ Lsl(out, out, 1);
2974   __ Eor(out, out, infinity);
2975   // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2976   __ Clz(out, out);
2977   // Any number less than 32 logically shifted right by 5 bits results in 0;
2978   // the same operation on 32 yields 1.
2979   __ Lsr(out, out, 5);
2980 }
2981 
VisitDoubleIsInfinite(HInvoke * invoke)2982 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2983   CreateFPToIntLocations(arena_, invoke);
2984 }
2985 
VisitDoubleIsInfinite(HInvoke * invoke)2986 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2987   ArmVIXLAssembler* const assembler = GetAssembler();
2988   const vixl32::Register out = OutputRegister(invoke);
2989   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2990   const vixl32::Register temp = temps.Acquire();
2991   // The highest 32 bits of double precision positive infinity separated into
2992   // two constants encodable as immediate operands.
2993   constexpr uint32_t infinity_high  = 0x7f000000U;
2994   constexpr uint32_t infinity_high2 = 0x00f00000U;
2995 
2996   static_assert((infinity_high | infinity_high2) ==
2997                     static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2998                 "The constants do not add up to the high 32 bits of double "
2999                 "precision positive infinity.");
3000   __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
3001   __ Eor(out, out, infinity_high);
3002   __ Eor(out, out, infinity_high2);
3003   // We don't care about the sign bit, so shift left.
3004   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
3005   // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
3006   __ Clz(out, out);
3007   // Any number less than 32 logically shifted right by 5 bits results in 0;
3008   // the same operation on 32 yields 1.
3009   __ Lsr(out, out, 5);
3010 }
3011 
VisitReferenceGetReferent(HInvoke * invoke)3012 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
3013   if (kEmitCompilerReadBarrier) {
3014     // Do not intrinsify this call with the read barrier configuration.
3015     return;
3016   }
3017   LocationSummary* locations = new (arena_) LocationSummary(invoke,
3018                                                             LocationSummary::kCallOnSlowPath,
3019                                                             kIntrinsified);
3020   locations->SetInAt(0, Location::RequiresRegister());
3021   locations->SetOut(Location::SameAsFirstInput());
3022   locations->AddTemp(Location::RequiresRegister());
3023 }
3024 
VisitReferenceGetReferent(HInvoke * invoke)3025 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
3026   DCHECK(!kEmitCompilerReadBarrier);
3027   ArmVIXLAssembler* assembler = GetAssembler();
3028   LocationSummary* locations = invoke->GetLocations();
3029 
3030   vixl32::Register obj = InputRegisterAt(invoke, 0);
3031   vixl32::Register out = OutputRegister(invoke);
3032 
3033   SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
3034   codegen_->AddSlowPath(slow_path);
3035 
3036   // Load ArtMethod first.
3037   HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
3038   DCHECK(invoke_direct != nullptr);
3039   vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
3040       invoke_direct, locations->GetTemp(0)));
3041 
3042   // Now get declaring class.
3043   __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
3044 
3045   uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
3046   uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
3047   DCHECK_NE(slow_path_flag_offset, 0u);
3048   DCHECK_NE(disable_flag_offset, 0u);
3049   DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
3050 
3051   // Check static flags that prevent using intrinsic.
3052   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3053   vixl32::Register temp1 = temps.Acquire();
3054   __ Ldr(temp1, MemOperand(temp0, disable_flag_offset));
3055   __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset));
3056   __ Orr(temp0, temp1, temp0);
3057   __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel());
3058 
3059   // Fast path.
3060   __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
3061   codegen_->MaybeRecordImplicitNullCheck(invoke);
3062   assembler->MaybeUnpoisonHeapReference(out);
3063   __ Bind(slow_path->GetExitLabel());
3064 }
3065 
VisitMathCeil(HInvoke * invoke)3066 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
3067   if (features_.HasARMv8AInstructions()) {
3068     CreateFPToFPLocations(arena_, invoke);
3069   }
3070 }
3071 
VisitMathCeil(HInvoke * invoke)3072 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
3073   ArmVIXLAssembler* assembler = GetAssembler();
3074   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
3075   __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
3076 }
3077 
VisitMathFloor(HInvoke * invoke)3078 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
3079   if (features_.HasARMv8AInstructions()) {
3080     CreateFPToFPLocations(arena_, invoke);
3081   }
3082 }
3083 
VisitMathFloor(HInvoke * invoke)3084 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
3085   ArmVIXLAssembler* assembler = GetAssembler();
3086   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
3087   __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
3088 }
3089 
VisitIntegerValueOf(HInvoke * invoke)3090 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
3091   InvokeRuntimeCallingConventionARMVIXL calling_convention;
3092   IntrinsicVisitor::ComputeIntegerValueOfLocations(
3093       invoke,
3094       codegen_,
3095       LocationFrom(r0),
3096       LocationFrom(calling_convention.GetRegisterAt(0)));
3097 }
3098 
VisitIntegerValueOf(HInvoke * invoke)3099 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
3100   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3101   LocationSummary* locations = invoke->GetLocations();
3102   ArmVIXLAssembler* const assembler = GetAssembler();
3103 
3104   vixl32::Register out = RegisterFrom(locations->Out());
3105   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3106   vixl32::Register temp = temps.Acquire();
3107   InvokeRuntimeCallingConventionARMVIXL calling_convention;
3108   vixl32::Register argument = calling_convention.GetRegisterAt(0);
3109   if (invoke->InputAt(0)->IsConstant()) {
3110     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3111     if (value >= info.low && value <= info.high) {
3112       // Just embed the j.l.Integer in the code.
3113       ScopedObjectAccess soa(Thread::Current());
3114       mirror::Object* boxed = info.cache->Get(value + (-info.low));
3115       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3116       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3117       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
3118     } else {
3119       // Allocate and initialize a new j.l.Integer.
3120       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3121       // JIT object table.
3122       uint32_t address =
3123           dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3124       __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3125       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3126       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3127       __ Mov(temp, value);
3128       assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
3129       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3130       // one.
3131       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3132     }
3133   } else {
3134     vixl32::Register in = RegisterFrom(locations->InAt(0));
3135     // Check bounds of our cache.
3136     __ Add(out, in, -info.low);
3137     __ Cmp(out, info.high - info.low + 1);
3138     vixl32::Label allocate, done;
3139     __ B(hs, &allocate);
3140     // If the value is within the bounds, load the j.l.Integer directly from the array.
3141     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3142     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3143     __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
3144     codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out);
3145     assembler->MaybeUnpoisonHeapReference(out);
3146     __ B(&done);
3147     __ Bind(&allocate);
3148     // Otherwise allocate and initialize a new j.l.Integer.
3149     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3150     __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3151     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3152     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3153     assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
3154     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3155     // one.
3156     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3157     __ Bind(&done);
3158   }
3159 }
3160 
3161 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
3162 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
3163 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
3164 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
3165 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
3166 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
3167 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
3168 
3169 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
3170 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
3171 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
3172 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
3173 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
3174 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
3175 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
3176 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
3177 
3178 // 1.8.
3179 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
3180 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
3181 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
3182 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
3183 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
3184 
3185 UNREACHABLE_INTRINSICS(ARMVIXL)
3186 
3187 #undef __
3188 
3189 }  // namespace arm
3190 }  // namespace art
3191