1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm_vixl.h"
18 
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "art_method.h"
21 #include "code_generator_arm_vixl.h"
22 #include "common_arm.h"
23 #include "heap_poisoning.h"
24 #include "lock_word.h"
25 #include "mirror/array-inl.h"
26 #include "mirror/object_array-inl.h"
27 #include "mirror/reference.h"
28 #include "mirror/string-inl.h"
29 #include "scoped_thread_state_change-inl.h"
30 #include "thread-current-inl.h"
31 
32 #include "aarch32/constants-aarch32.h"
33 
34 namespace art {
35 namespace arm {
36 
37 #define __ assembler->GetVIXLAssembler()->
38 
39 using helpers::DRegisterFrom;
40 using helpers::HighRegisterFrom;
41 using helpers::InputDRegisterAt;
42 using helpers::InputRegisterAt;
43 using helpers::InputSRegisterAt;
44 using helpers::Int32ConstantFrom;
45 using helpers::LocationFrom;
46 using helpers::LowRegisterFrom;
47 using helpers::LowSRegisterFrom;
48 using helpers::HighSRegisterFrom;
49 using helpers::OutputDRegister;
50 using helpers::OutputRegister;
51 using helpers::RegisterFrom;
52 using helpers::SRegisterFrom;
53 
54 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
55 
56 using vixl::ExactAssemblyScope;
57 using vixl::CodeBufferCheckScope;
58 
GetAssembler()59 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
60   return codegen_->GetAssembler();
61 }
62 
GetAllocator()63 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
64   return codegen_->GetGraph()->GetAllocator();
65 }
66 
67 // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
68 // intrinsified call. This will copy the arguments into the positions for a regular call.
69 //
70 // Note: The actual parameters are required to be in the locations given by the invoke's location
71 //       summary. If an intrinsic modifies those locations before a slowpath call, they must be
72 //       restored!
73 //
74 // Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
75 //       sub-optimal (compared to a direct pointer call), but this is a slow-path.
76 
77 class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
78  public:
IntrinsicSlowPathARMVIXL(HInvoke * invoke)79   explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
80       : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
81 
MoveArguments(CodeGenerator * codegen)82   Location MoveArguments(CodeGenerator* codegen) {
83     InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
84     IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
85     return calling_convention_visitor.GetMethodLocation();
86   }
87 
EmitNativeCode(CodeGenerator * codegen)88   void EmitNativeCode(CodeGenerator* codegen) override {
89     ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
90     __ Bind(GetEntryLabel());
91 
92     SaveLiveRegisters(codegen, invoke_->GetLocations());
93 
94     Location method_loc = MoveArguments(codegen);
95 
96     if (invoke_->IsInvokeStaticOrDirect()) {
97       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
98     } else {
99       codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
100     }
101 
102     // Copy the result back to the expected output.
103     Location out = invoke_->GetLocations()->Out();
104     if (out.IsValid()) {
105       DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
106       DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
107       codegen->MoveFromReturnRegister(out, invoke_->GetType());
108     }
109 
110     RestoreLiveRegisters(codegen, invoke_->GetLocations());
111     __ B(GetExitLabel());
112   }
113 
GetDescription() const114   const char* GetDescription() const override { return "IntrinsicSlowPath"; }
115 
116  private:
117   // The instruction where this slow path is happening.
118   HInvoke* const invoke_;
119 
120   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
121 };
122 
123 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmVIXLAssembler * assembler,DataType::Type type,const vixl32::Register & array,const Location & pos,const vixl32::Register & base)124 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
125                                           DataType::Type type,
126                                           const vixl32::Register& array,
127                                           const Location& pos,
128                                           const vixl32::Register& base) {
129   // This routine is only used by the SystemArrayCopy intrinsic at the
130   // moment. We can allow DataType::Type::kReference as `type` to implement
131   // the SystemArrayCopyChar intrinsic.
132   DCHECK_EQ(type, DataType::Type::kReference);
133   const int32_t element_size = DataType::Size(type);
134   const uint32_t element_size_shift = DataType::SizeShift(type);
135   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
136 
137   if (pos.IsConstant()) {
138     int32_t constant = Int32ConstantFrom(pos);
139     __ Add(base, array, element_size * constant + data_offset);
140   } else {
141     __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
142     __ Add(base, base, data_offset);
143   }
144 }
145 
146 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmVIXLAssembler * assembler,DataType::Type type,const Location & copy_length,const vixl32::Register & base,const vixl32::Register & end)147 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
148                                          DataType::Type type,
149                                          const Location& copy_length,
150                                          const vixl32::Register& base,
151                                          const vixl32::Register& end) {
152   // This routine is only used by the SystemArrayCopy intrinsic at the
153   // moment. We can allow DataType::Type::kReference as `type` to implement
154   // the SystemArrayCopyChar intrinsic.
155   DCHECK_EQ(type, DataType::Type::kReference);
156   const int32_t element_size = DataType::Size(type);
157   const uint32_t element_size_shift = DataType::SizeShift(type);
158 
159   if (copy_length.IsConstant()) {
160     int32_t constant = Int32ConstantFrom(copy_length);
161     __ Add(end, base, element_size * constant);
162   } else {
163     __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
164   }
165 }
166 
167 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
168 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
169  public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)170   explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
171       : SlowPathCodeARMVIXL(instruction) {
172     DCHECK(kEmitCompilerReadBarrier);
173     DCHECK(kUseBakerReadBarrier);
174   }
175 
EmitNativeCode(CodeGenerator * codegen)176   void EmitNativeCode(CodeGenerator* codegen) override {
177     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
178     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
179     LocationSummary* locations = instruction_->GetLocations();
180     DCHECK(locations->CanCall());
181     DCHECK(instruction_->IsInvokeStaticOrDirect())
182         << "Unexpected instruction in read barrier arraycopy slow path: "
183         << instruction_->DebugName();
184     DCHECK(instruction_->GetLocations()->Intrinsified());
185     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
186 
187     DataType::Type type = DataType::Type::kReference;
188     const int32_t element_size = DataType::Size(type);
189 
190     vixl32::Register dest = InputRegisterAt(instruction_, 2);
191     Location dest_pos = locations->InAt(3);
192     vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
193     vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
194     vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
195     vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
196 
197     __ Bind(GetEntryLabel());
198     // Compute the base destination address in `dst_curr_addr`.
199     GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
200 
201     vixl32::Label loop;
202     __ Bind(&loop);
203     __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
204     assembler->MaybeUnpoisonHeapReference(tmp);
205     // TODO: Inline the mark bit check before calling the runtime?
206     // tmp = ReadBarrier::Mark(tmp);
207     // No need to save live registers; it's taken care of by the
208     // entrypoint. Also, there is no need to update the stack mask,
209     // as this runtime call will not trigger a garbage collection.
210     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
211     // explanations.)
212     DCHECK(!tmp.IsSP());
213     DCHECK(!tmp.IsLR());
214     DCHECK(!tmp.IsPC());
215     // IP is used internally by the ReadBarrierMarkRegX entry point
216     // as a temporary (and not preserved).  It thus cannot be used by
217     // any live register in this slow path.
218     DCHECK(!src_curr_addr.Is(ip));
219     DCHECK(!dst_curr_addr.Is(ip));
220     DCHECK(!src_stop_addr.Is(ip));
221     DCHECK(!tmp.Is(ip));
222     DCHECK(tmp.IsRegister()) << tmp;
223     // TODO: Load the entrypoint once before the loop, instead of
224     // loading it at every iteration.
225     int32_t entry_point_offset =
226         Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
227     // This runtime call does not require a stack map.
228     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
229     assembler->MaybePoisonHeapReference(tmp);
230     __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
231     __ Cmp(src_curr_addr, src_stop_addr);
232     __ B(ne, &loop, /* is_far_target= */ false);
233     __ B(GetExitLabel());
234   }
235 
GetDescription() const236   const char* GetDescription() const override {
237     return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
238   }
239 
240  private:
241   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
242 };
243 
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)244 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
245     : allocator_(codegen->GetGraph()->GetAllocator()),
246       codegen_(codegen),
247       assembler_(codegen->GetAssembler()),
248       features_(codegen->GetInstructionSetFeatures()) {}
249 
TryDispatch(HInvoke * invoke)250 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
251   Dispatch(invoke);
252   LocationSummary* res = invoke->GetLocations();
253   if (res == nullptr) {
254     return false;
255   }
256   return res->Intrinsified();
257 }
258 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)259 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
260   LocationSummary* locations =
261       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
262   locations->SetInAt(0, Location::RequiresFpuRegister());
263   locations->SetOut(Location::RequiresRegister());
264 }
265 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)266 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
267   LocationSummary* locations =
268       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
269   locations->SetInAt(0, Location::RequiresRegister());
270   locations->SetOut(Location::RequiresFpuRegister());
271 }
272 
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)273 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
274   Location input = locations->InAt(0);
275   Location output = locations->Out();
276   if (is64bit) {
277     __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
278   } else {
279     __ Vmov(RegisterFrom(output), SRegisterFrom(input));
280   }
281 }
282 
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)283 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
284   Location input = locations->InAt(0);
285   Location output = locations->Out();
286   if (is64bit) {
287     __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
288   } else {
289     __ Vmov(SRegisterFrom(output), RegisterFrom(input));
290   }
291 }
292 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)293 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
294   CreateFPToIntLocations(allocator_, invoke);
295 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)296 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
297   CreateIntToFPLocations(allocator_, invoke);
298 }
299 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)300 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
301   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
302 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)303 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
304   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
305 }
306 
VisitFloatFloatToRawIntBits(HInvoke * invoke)307 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
308   CreateFPToIntLocations(allocator_, invoke);
309 }
VisitFloatIntBitsToFloat(HInvoke * invoke)310 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
311   CreateIntToFPLocations(allocator_, invoke);
312 }
313 
VisitFloatFloatToRawIntBits(HInvoke * invoke)314 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
315   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
316 }
VisitFloatIntBitsToFloat(HInvoke * invoke)317 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
318   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
319 }
320 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)321 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
322   LocationSummary* locations =
323       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
324   locations->SetInAt(0, Location::RequiresRegister());
325   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
326 }
327 
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)328 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
329   LocationSummary* locations =
330       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
331   locations->SetInAt(0, Location::RequiresRegister());
332   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
333 }
334 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)335 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
336   LocationSummary* locations =
337       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
338   locations->SetInAt(0, Location::RequiresFpuRegister());
339   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
340 }
341 
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)342 static void GenNumberOfLeadingZeros(HInvoke* invoke,
343                                     DataType::Type type,
344                                     CodeGeneratorARMVIXL* codegen) {
345   ArmVIXLAssembler* assembler = codegen->GetAssembler();
346   LocationSummary* locations = invoke->GetLocations();
347   Location in = locations->InAt(0);
348   vixl32::Register out = RegisterFrom(locations->Out());
349 
350   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
351 
352   if (type == DataType::Type::kInt64) {
353     vixl32::Register in_reg_lo = LowRegisterFrom(in);
354     vixl32::Register in_reg_hi = HighRegisterFrom(in);
355     vixl32::Label end;
356     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
357     __ Clz(out, in_reg_hi);
358     __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
359     __ Clz(out, in_reg_lo);
360     __ Add(out, out, 32);
361     if (end.IsReferenced()) {
362       __ Bind(&end);
363     }
364   } else {
365     __ Clz(out, RegisterFrom(in));
366   }
367 }
368 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)369 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
370   CreateIntToIntLocations(allocator_, invoke);
371 }
372 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)373 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
374   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
375 }
376 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)377 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
378   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
379 }
380 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)381 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
382   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
383 }
384 
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)385 static void GenNumberOfTrailingZeros(HInvoke* invoke,
386                                      DataType::Type type,
387                                      CodeGeneratorARMVIXL* codegen) {
388   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
389 
390   ArmVIXLAssembler* assembler = codegen->GetAssembler();
391   LocationSummary* locations = invoke->GetLocations();
392   vixl32::Register out = RegisterFrom(locations->Out());
393 
394   if (type == DataType::Type::kInt64) {
395     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
396     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
397     vixl32::Label end;
398     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
399     __ Rbit(out, in_reg_lo);
400     __ Clz(out, out);
401     __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
402     __ Rbit(out, in_reg_hi);
403     __ Clz(out, out);
404     __ Add(out, out, 32);
405     if (end.IsReferenced()) {
406       __ Bind(&end);
407     }
408   } else {
409     vixl32::Register in = RegisterFrom(locations->InAt(0));
410     __ Rbit(out, in);
411     __ Clz(out, out);
412   }
413 }
414 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)415 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
416   CreateIntToIntLocations(allocator_, invoke);
417 }
418 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)419 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
420   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
421 }
422 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)423 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
424   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
425 }
426 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)427 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
428   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
429 }
430 
VisitMathSqrt(HInvoke * invoke)431 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
432   CreateFPToFPLocations(allocator_, invoke);
433 }
434 
VisitMathSqrt(HInvoke * invoke)435 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
436   ArmVIXLAssembler* assembler = GetAssembler();
437   __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
438 }
439 
VisitMathRint(HInvoke * invoke)440 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
441   if (features_.HasARMv8AInstructions()) {
442     CreateFPToFPLocations(allocator_, invoke);
443   }
444 }
445 
VisitMathRint(HInvoke * invoke)446 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
447   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
448   ArmVIXLAssembler* assembler = GetAssembler();
449   __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
450 }
451 
VisitMathRoundFloat(HInvoke * invoke)452 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
453   if (features_.HasARMv8AInstructions()) {
454     LocationSummary* locations =
455         new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
456     locations->SetInAt(0, Location::RequiresFpuRegister());
457     locations->SetOut(Location::RequiresRegister());
458     locations->AddTemp(Location::RequiresFpuRegister());
459   }
460 }
461 
VisitMathRoundFloat(HInvoke * invoke)462 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
463   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
464 
465   ArmVIXLAssembler* assembler = GetAssembler();
466   vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
467   vixl32::Register out_reg = OutputRegister(invoke);
468   vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
469   vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
470   vixl32::Label done;
471   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
472 
473   // Round to nearest integer, ties away from zero.
474   __ Vcvta(S32, F32, temp1, in_reg);
475   __ Vmov(out_reg, temp1);
476 
477   // For positive, zero or NaN inputs, rounding is done.
478   __ Cmp(out_reg, 0);
479   __ B(ge, final_label, /* is_far_target= */ false);
480 
481   // Handle input < 0 cases.
482   // If input is negative but not a tie, previous result (round to nearest) is valid.
483   // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
484   __ Vrinta(F32, temp1, in_reg);
485   __ Vmov(temp2, 0.5);
486   __ Vsub(F32, temp1, in_reg, temp1);
487   __ Vcmp(F32, temp1, temp2);
488   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
489   {
490     // Use ExactAsemblyScope here because we are using IT.
491     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
492                                 2 * kMaxInstructionSizeInBytes,
493                                 CodeBufferCheckScope::kMaximumSize);
494     __ it(eq);
495     __ add(eq, out_reg, out_reg, 1);
496   }
497 
498   if (done.IsReferenced()) {
499     __ Bind(&done);
500   }
501 }
502 
VisitMemoryPeekByte(HInvoke * invoke)503 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
504   CreateIntToIntLocations(allocator_, invoke);
505 }
506 
VisitMemoryPeekByte(HInvoke * invoke)507 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
508   ArmVIXLAssembler* assembler = GetAssembler();
509   // Ignore upper 4B of long address.
510   __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
511 }
512 
VisitMemoryPeekIntNative(HInvoke * invoke)513 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
514   CreateIntToIntLocations(allocator_, invoke);
515 }
516 
VisitMemoryPeekIntNative(HInvoke * invoke)517 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
518   ArmVIXLAssembler* assembler = GetAssembler();
519   // Ignore upper 4B of long address.
520   __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
521 }
522 
VisitMemoryPeekLongNative(HInvoke * invoke)523 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
524   CreateIntToIntLocations(allocator_, invoke);
525 }
526 
VisitMemoryPeekLongNative(HInvoke * invoke)527 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
528   ArmVIXLAssembler* assembler = GetAssembler();
529   // Ignore upper 4B of long address.
530   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
531   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
532   // exception. So we can't use ldrd as addr may be unaligned.
533   vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
534   vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
535   if (addr.Is(lo)) {
536     __ Ldr(hi, MemOperand(addr, 4));
537     __ Ldr(lo, MemOperand(addr));
538   } else {
539     __ Ldr(lo, MemOperand(addr));
540     __ Ldr(hi, MemOperand(addr, 4));
541   }
542 }
543 
VisitMemoryPeekShortNative(HInvoke * invoke)544 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
545   CreateIntToIntLocations(allocator_, invoke);
546 }
547 
VisitMemoryPeekShortNative(HInvoke * invoke)548 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
549   ArmVIXLAssembler* assembler = GetAssembler();
550   // Ignore upper 4B of long address.
551   __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
552 }
553 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)554 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
555   LocationSummary* locations =
556       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
557   locations->SetInAt(0, Location::RequiresRegister());
558   locations->SetInAt(1, Location::RequiresRegister());
559 }
560 
VisitMemoryPokeByte(HInvoke * invoke)561 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
562   CreateIntIntToVoidLocations(allocator_, invoke);
563 }
564 
VisitMemoryPokeByte(HInvoke * invoke)565 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
566   ArmVIXLAssembler* assembler = GetAssembler();
567   __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
568 }
569 
VisitMemoryPokeIntNative(HInvoke * invoke)570 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
571   CreateIntIntToVoidLocations(allocator_, invoke);
572 }
573 
VisitMemoryPokeIntNative(HInvoke * invoke)574 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
575   ArmVIXLAssembler* assembler = GetAssembler();
576   __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
577 }
578 
VisitMemoryPokeLongNative(HInvoke * invoke)579 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
580   CreateIntIntToVoidLocations(allocator_, invoke);
581 }
582 
VisitMemoryPokeLongNative(HInvoke * invoke)583 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
584   ArmVIXLAssembler* assembler = GetAssembler();
585   // Ignore upper 4B of long address.
586   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
587   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
588   // exception. So we can't use ldrd as addr may be unaligned.
589   __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
590   __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
591 }
592 
VisitMemoryPokeShortNative(HInvoke * invoke)593 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
594   CreateIntIntToVoidLocations(allocator_, invoke);
595 }
596 
VisitMemoryPokeShortNative(HInvoke * invoke)597 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
598   ArmVIXLAssembler* assembler = GetAssembler();
599   __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
600 }
601 
VisitThreadCurrentThread(HInvoke * invoke)602 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
603   LocationSummary* locations =
604       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
605   locations->SetOut(Location::RequiresRegister());
606 }
607 
VisitThreadCurrentThread(HInvoke * invoke)608 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
609   ArmVIXLAssembler* assembler = GetAssembler();
610   __ Ldr(OutputRegister(invoke),
611          MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
612 }
613 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARMVIXL * codegen)614 static void GenUnsafeGet(HInvoke* invoke,
615                          DataType::Type type,
616                          bool is_volatile,
617                          CodeGeneratorARMVIXL* codegen) {
618   LocationSummary* locations = invoke->GetLocations();
619   ArmVIXLAssembler* assembler = codegen->GetAssembler();
620   Location base_loc = locations->InAt(1);
621   vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
622   Location offset_loc = locations->InAt(2);
623   vixl32::Register offset = LowRegisterFrom(offset_loc);  // Long offset, lo part only.
624   Location trg_loc = locations->Out();
625 
626   switch (type) {
627     case DataType::Type::kInt32: {
628       vixl32::Register trg = RegisterFrom(trg_loc);
629       __ Ldr(trg, MemOperand(base, offset));
630       if (is_volatile) {
631         __ Dmb(vixl32::ISH);
632       }
633       break;
634     }
635 
636     case DataType::Type::kReference: {
637       vixl32::Register trg = RegisterFrom(trg_loc);
638       if (kEmitCompilerReadBarrier) {
639         if (kUseBakerReadBarrier) {
640           Location temp = locations->GetTemp(0);
641           // Piggy-back on the field load path using introspection for the Baker read barrier.
642           __ Add(RegisterFrom(temp), base, Operand(offset));
643           MemOperand src(RegisterFrom(temp), 0);
644           codegen->GenerateFieldLoadWithBakerReadBarrier(
645               invoke, trg_loc, base, src, /* needs_null_check= */ false);
646           if (is_volatile) {
647             __ Dmb(vixl32::ISH);
648           }
649         } else {
650           __ Ldr(trg, MemOperand(base, offset));
651           if (is_volatile) {
652             __ Dmb(vixl32::ISH);
653           }
654           codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
655         }
656       } else {
657         __ Ldr(trg, MemOperand(base, offset));
658         if (is_volatile) {
659           __ Dmb(vixl32::ISH);
660         }
661         assembler->MaybeUnpoisonHeapReference(trg);
662       }
663       break;
664     }
665 
666     case DataType::Type::kInt64: {
667       vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
668       vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
669       if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
670         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
671         const vixl32::Register temp_reg = temps.Acquire();
672         __ Add(temp_reg, base, offset);
673         __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
674       } else {
675         __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
676       }
677       if (is_volatile) {
678         __ Dmb(vixl32::ISH);
679       }
680       break;
681     }
682 
683     default:
684       LOG(FATAL) << "Unexpected type " << type;
685       UNREACHABLE();
686   }
687 }
688 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)689 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
690                                           HInvoke* invoke,
691                                           DataType::Type type) {
692   bool can_call = kEmitCompilerReadBarrier &&
693       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
694        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
695   LocationSummary* locations =
696       new (allocator) LocationSummary(invoke,
697                                       can_call
698                                           ? LocationSummary::kCallOnSlowPath
699                                           : LocationSummary::kNoCall,
700                                       kIntrinsified);
701   if (can_call && kUseBakerReadBarrier) {
702     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
703   }
704   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
705   locations->SetInAt(1, Location::RequiresRegister());
706   locations->SetInAt(2, Location::RequiresRegister());
707   locations->SetOut(Location::RequiresRegister(),
708                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
709   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
710     // We need a temporary register for the read barrier marking slow
711     // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier.
712     locations->AddTemp(Location::RequiresRegister());
713   }
714 }
715 
VisitUnsafeGet(HInvoke * invoke)716 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
717   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
718 }
VisitUnsafeGetVolatile(HInvoke * invoke)719 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
720   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
721 }
VisitUnsafeGetLong(HInvoke * invoke)722 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
723   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
724 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)725 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
726   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
727 }
VisitUnsafeGetObject(HInvoke * invoke)728 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
729   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
730 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)731 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
732   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
733 }
734 
VisitUnsafeGet(HInvoke * invoke)735 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
736   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
737 }
VisitUnsafeGetVolatile(HInvoke * invoke)738 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
739   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
740 }
VisitUnsafeGetLong(HInvoke * invoke)741 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
742   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
743 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)744 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
745   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
746 }
VisitUnsafeGetObject(HInvoke * invoke)747 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
748   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
749 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)750 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
751   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
752 }
753 
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,const ArmInstructionSetFeatures & features,DataType::Type type,bool is_volatile,HInvoke * invoke)754 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator,
755                                      const ArmInstructionSetFeatures& features,
756                                      DataType::Type type,
757                                      bool is_volatile,
758                                      HInvoke* invoke) {
759   LocationSummary* locations =
760       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
761   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
762   locations->SetInAt(1, Location::RequiresRegister());
763   locations->SetInAt(2, Location::RequiresRegister());
764   locations->SetInAt(3, Location::RequiresRegister());
765 
766   if (type == DataType::Type::kInt64) {
767     // Potentially need temps for ldrexd-strexd loop.
768     if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
769       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
770       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
771     }
772   } else if (type == DataType::Type::kReference) {
773     // Temps for card-marking.
774     locations->AddTemp(Location::RequiresRegister());  // Temp.
775     locations->AddTemp(Location::RequiresRegister());  // Card.
776   }
777 }
778 
VisitUnsafePut(HInvoke * invoke)779 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
780   CreateIntIntIntIntToVoid(
781       allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke);
782 }
VisitUnsafePutOrdered(HInvoke * invoke)783 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
784   CreateIntIntIntIntToVoid(
785       allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke);
786 }
VisitUnsafePutVolatile(HInvoke * invoke)787 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
788   CreateIntIntIntIntToVoid(
789       allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ true, invoke);
790 }
VisitUnsafePutObject(HInvoke * invoke)791 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
792   CreateIntIntIntIntToVoid(
793       allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke);
794 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)795 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
796   CreateIntIntIntIntToVoid(
797       allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke);
798 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)799 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
800   CreateIntIntIntIntToVoid(
801       allocator_, features_, DataType::Type::kReference, /* is_volatile= */ true, invoke);
802 }
VisitUnsafePutLong(HInvoke * invoke)803 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
804   CreateIntIntIntIntToVoid(
805       allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke);
806 }
VisitUnsafePutLongOrdered(HInvoke * invoke)807 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
808   CreateIntIntIntIntToVoid(
809       allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke);
810 }
VisitUnsafePutLongVolatile(HInvoke * invoke)811 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
812   CreateIntIntIntIntToVoid(
813       allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ true, invoke);
814 }
815 
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARMVIXL * codegen)816 static void GenUnsafePut(LocationSummary* locations,
817                          DataType::Type type,
818                          bool is_volatile,
819                          bool is_ordered,
820                          CodeGeneratorARMVIXL* codegen) {
821   ArmVIXLAssembler* assembler = codegen->GetAssembler();
822 
823   vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
824   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
825   vixl32::Register value;
826 
827   if (is_volatile || is_ordered) {
828     __ Dmb(vixl32::ISH);
829   }
830 
831   if (type == DataType::Type::kInt64) {
832     vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
833     vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
834     value = value_lo;
835     if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
836       vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
837       vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
838       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
839       const vixl32::Register temp_reg = temps.Acquire();
840 
841       __ Add(temp_reg, base, offset);
842       vixl32::Label loop_head;
843       __ Bind(&loop_head);
844       __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
845       __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
846       __ Cmp(temp_lo, 0);
847       __ B(ne, &loop_head, /* is_far_target= */ false);
848     } else {
849       __ Strd(value_lo, value_hi, MemOperand(base, offset));
850     }
851   } else {
852     value = RegisterFrom(locations->InAt(3));
853     vixl32::Register source = value;
854     if (kPoisonHeapReferences && type == DataType::Type::kReference) {
855       vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
856       __ Mov(temp, value);
857       assembler->PoisonHeapReference(temp);
858       source = temp;
859     }
860     __ Str(source, MemOperand(base, offset));
861   }
862 
863   if (is_volatile) {
864     __ Dmb(vixl32::ISH);
865   }
866 
867   if (type == DataType::Type::kReference) {
868     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
869     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
870     bool value_can_be_null = true;  // TODO: Worth finding out this information?
871     codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
872   }
873 }
874 
VisitUnsafePut(HInvoke * invoke)875 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
876   GenUnsafePut(invoke->GetLocations(),
877                DataType::Type::kInt32,
878                /* is_volatile= */ false,
879                /* is_ordered= */ false,
880                codegen_);
881 }
VisitUnsafePutOrdered(HInvoke * invoke)882 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
883   GenUnsafePut(invoke->GetLocations(),
884                DataType::Type::kInt32,
885                /* is_volatile= */ false,
886                /* is_ordered= */ true,
887                codegen_);
888 }
VisitUnsafePutVolatile(HInvoke * invoke)889 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
890   GenUnsafePut(invoke->GetLocations(),
891                DataType::Type::kInt32,
892                /* is_volatile= */ true,
893                /* is_ordered= */ false,
894                codegen_);
895 }
VisitUnsafePutObject(HInvoke * invoke)896 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
897   GenUnsafePut(invoke->GetLocations(),
898                DataType::Type::kReference,
899                /* is_volatile= */ false,
900                /* is_ordered= */ false,
901                codegen_);
902 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)903 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
904   GenUnsafePut(invoke->GetLocations(),
905                DataType::Type::kReference,
906                /* is_volatile= */ false,
907                /* is_ordered= */ true,
908                codegen_);
909 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)910 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
911   GenUnsafePut(invoke->GetLocations(),
912                DataType::Type::kReference,
913                /* is_volatile= */ true,
914                /* is_ordered= */ false,
915                codegen_);
916 }
VisitUnsafePutLong(HInvoke * invoke)917 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
918   GenUnsafePut(invoke->GetLocations(),
919                DataType::Type::kInt64,
920                /* is_volatile= */ false,
921                /* is_ordered= */ false,
922                codegen_);
923 }
VisitUnsafePutLongOrdered(HInvoke * invoke)924 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
925   GenUnsafePut(invoke->GetLocations(),
926                DataType::Type::kInt64,
927                /* is_volatile= */ false,
928                /* is_ordered= */ true,
929                codegen_);
930 }
VisitUnsafePutLongVolatile(HInvoke * invoke)931 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
932   GenUnsafePut(invoke->GetLocations(),
933                DataType::Type::kInt64,
934                /* is_volatile= */ true,
935                /* is_ordered= */ false,
936                codegen_);
937 }
938 
CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator * allocator,HInvoke * invoke)939 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
940   bool can_call = kEmitCompilerReadBarrier &&
941       kUseBakerReadBarrier &&
942       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
943   LocationSummary* locations =
944       new (allocator) LocationSummary(invoke,
945                                       can_call
946                                           ? LocationSummary::kCallOnSlowPath
947                                           : LocationSummary::kNoCall,
948                                       kIntrinsified);
949   if (can_call) {
950     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
951   }
952   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
953   locations->SetInAt(1, Location::RequiresRegister());
954   locations->SetInAt(2, Location::RequiresRegister());
955   locations->SetInAt(3, Location::RequiresRegister());
956   locations->SetInAt(4, Location::RequiresRegister());
957 
958   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
959 
960   // Temporary registers used in CAS. In the object case
961   // (UnsafeCASObject intrinsic), these are also used for
962   // card-marking, and possibly for (Baker) read barrier.
963   locations->AddTemp(Location::RequiresRegister());  // Pointer.
964   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
965 }
966 
967 class BakerReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
968  public:
BakerReadBarrierCasSlowPathARMVIXL(HInvoke * invoke)969   explicit BakerReadBarrierCasSlowPathARMVIXL(HInvoke* invoke)
970       : SlowPathCodeARMVIXL(invoke) {}
971 
GetDescription() const972   const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARMVIXL"; }
973 
EmitNativeCode(CodeGenerator * codegen)974   void EmitNativeCode(CodeGenerator* codegen) override {
975     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
976     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
977     __ Bind(GetEntryLabel());
978 
979     LocationSummary* locations = instruction_->GetLocations();
980     vixl32::Register base = InputRegisterAt(instruction_, 1);           // Object pointer.
981     vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
982     vixl32::Register expected = InputRegisterAt(instruction_, 3);       // Expected.
983     vixl32::Register value = InputRegisterAt(instruction_, 4);          // Value.
984 
985     vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0));     // Pointer to actual memory.
986     vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Temporary.
987 
988     // The `tmp` is initialized to `[tmp_ptr] - expected` in the main path. Reconstruct
989     // and mark the old value and compare with `expected`. We clobber `tmp_ptr` in the
990     // process due to lack of other temps suitable for the read barrier.
991     arm_codegen->GenerateUnsafeCasOldValueAddWithBakerReadBarrier(tmp_ptr, tmp, expected);
992     __ Cmp(tmp_ptr, expected);
993     __ B(ne, GetExitLabel());
994 
995     // The old value we have read did not match `expected` (which is always a to-space reference)
996     // but after the read barrier in GenerateUnsafeCasOldValueAddWithBakerReadBarrier() the marked
997     // to-space value matched, so the old value must be a from-space reference to the same object.
998     // Do the same CAS loop as the main path but check for both `expected` and the unmarked
999     // old value representing the to-space and from-space references for the same object.
1000 
1001     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1002     vixl32::Register adjusted_old_value = temps.Acquire();      // For saved `tmp` from main path.
1003 
1004     // Recalculate the `tmp_ptr` clobbered above and store the `adjusted_old_value`, i.e. IP.
1005     __ Add(tmp_ptr, base, offset);
1006     __ Mov(adjusted_old_value, tmp);
1007 
1008     // do {
1009     //   tmp = [r_ptr] - expected;
1010     // } while ((tmp == 0 || tmp == adjusted_old_value) && failure([r_ptr] <- r_new_value));
1011     // result = (tmp == 0 || tmp == adjusted_old_value);
1012 
1013     vixl32::Label loop_head;
1014     __ Bind(&loop_head);
1015     __ Ldrex(tmp, MemOperand(tmp_ptr));  // This can now load null stored by another thread.
1016     assembler->MaybeUnpoisonHeapReference(tmp);
1017     __ Subs(tmp, tmp, expected);         // Use SUBS to get non-zero value if both compares fail.
1018     {
1019       // If the newly loaded value did not match `expected`, compare with `adjusted_old_value`.
1020       ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
1021       __ it(ne);
1022       __ cmp(ne, tmp, adjusted_old_value);
1023     }
1024     __ B(ne, GetExitLabel());
1025     assembler->MaybePoisonHeapReference(value);
1026     __ Strex(tmp, value, MemOperand(tmp_ptr));
1027     assembler->MaybeUnpoisonHeapReference(value);
1028     __ Cmp(tmp, 0);
1029     __ B(ne, &loop_head, /* is_far_target= */ false);
1030     __ B(GetExitLabel());
1031   }
1032 };
1033 
GenCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)1034 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
1035   DCHECK_NE(type, DataType::Type::kInt64);
1036 
1037   ArmVIXLAssembler* assembler = codegen->GetAssembler();
1038   LocationSummary* locations = invoke->GetLocations();
1039 
1040   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
1041 
1042   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
1043   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
1044   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
1045   vixl32::Register value = InputRegisterAt(invoke, 4);                // Value.
1046 
1047   vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0));     // Pointer to actual memory.
1048   vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Temporary.
1049 
1050   vixl32::Label loop_exit_label;
1051   vixl32::Label* loop_exit = &loop_exit_label;
1052   vixl32::Label* failure = &loop_exit_label;
1053 
1054   if (type == DataType::Type::kReference) {
1055     // The only read barrier implementation supporting the
1056     // UnsafeCASObject intrinsic is the Baker-style read barriers.
1057     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1058 
1059     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1060     // object and scan the receiver at the next GC for nothing.
1061     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1062     codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1063 
1064     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1065       // If marking, check if the stored reference is a from-space reference to the same
1066       // object as the to-space reference `expected`. If so, perform a custom CAS loop.
1067       BakerReadBarrierCasSlowPathARMVIXL* slow_path =
1068           new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARMVIXL(invoke);
1069       codegen->AddSlowPath(slow_path);
1070       failure = slow_path->GetEntryLabel();
1071       loop_exit = slow_path->GetExitLabel();
1072     }
1073   }
1074 
1075   // Prevent reordering with prior memory operations.
1076   // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1077   // latter allows a preceding load to be delayed past the STREX
1078   // instruction below.
1079   __ Dmb(vixl32::ISH);
1080 
1081   __ Add(tmp_ptr, base, offset);
1082 
1083   // do {
1084   //   tmp = [r_ptr] - expected;
1085   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1086   // result = tmp == 0;
1087 
1088   vixl32::Label loop_head;
1089   __ Bind(&loop_head);
1090   __ Ldrex(tmp, MemOperand(tmp_ptr));
1091   if (type == DataType::Type::kReference) {
1092     assembler->MaybeUnpoisonHeapReference(tmp);
1093   }
1094   __ Subs(tmp, tmp, expected);
1095   static_cast<vixl32::MacroAssembler*>(assembler->GetVIXLAssembler())->
1096       B(ne, failure, /* hint= */ (failure == loop_exit) ? kNear : kBranchWithoutHint);
1097   if (type == DataType::Type::kReference) {
1098     assembler->MaybePoisonHeapReference(value);
1099   }
1100   __ Strex(tmp, value, MemOperand(tmp_ptr));
1101   if (type == DataType::Type::kReference) {
1102     assembler->MaybeUnpoisonHeapReference(value);
1103   }
1104   __ Cmp(tmp, 0);
1105   __ B(ne, &loop_head, /* is_far_target= */ false);
1106 
1107   __ Bind(loop_exit);
1108 
1109   __ Dmb(vixl32::ISH);
1110 
1111   // out = tmp == 0.
1112   __ Clz(out, tmp);
1113   __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
1114 
1115   if (type == DataType::Type::kReference) {
1116     codegen->MaybeGenerateMarkingRegisterCheck(/* code= */ 128);
1117   }
1118 }
1119 
VisitUnsafeCASInt(HInvoke * invoke)1120 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1121   CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
1122 }
VisitUnsafeCASObject(HInvoke * invoke)1123 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1124   // The only read barrier implementation supporting the
1125   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1126   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1127     return;
1128   }
1129 
1130   CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
1131 }
VisitUnsafeCASInt(HInvoke * invoke)1132 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1133   GenCas(invoke, DataType::Type::kInt32, codegen_);
1134 }
VisitUnsafeCASObject(HInvoke * invoke)1135 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1136   // The only read barrier implementation supporting the
1137   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1138   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1139 
1140   GenCas(invoke, DataType::Type::kReference, codegen_);
1141 }
1142 
VisitStringCompareTo(HInvoke * invoke)1143 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1144   // The inputs plus one temp.
1145   LocationSummary* locations =
1146       new (allocator_) LocationSummary(invoke,
1147                                        invoke->InputAt(1)->CanBeNull()
1148                                            ? LocationSummary::kCallOnSlowPath
1149                                            : LocationSummary::kNoCall,
1150                                        kIntrinsified);
1151   locations->SetInAt(0, Location::RequiresRegister());
1152   locations->SetInAt(1, Location::RequiresRegister());
1153   locations->AddTemp(Location::RequiresRegister());
1154   locations->AddTemp(Location::RequiresRegister());
1155   locations->AddTemp(Location::RequiresRegister());
1156   // Need temporary registers for String compression's feature.
1157   if (mirror::kUseStringCompression) {
1158     locations->AddTemp(Location::RequiresRegister());
1159   }
1160   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1161 }
1162 
1163 // Forward declaration.
1164 //
1165 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
1166 // by the compiler for every C++ function, and if this function gets inlined in
1167 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
1168 // build failure. That is the reason why NO_INLINE attribute is used.
1169 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1170                                                   HInvoke* invoke,
1171                                                   vixl32::Label* end,
1172                                                   vixl32::Label* different_compression);
1173 
VisitStringCompareTo(HInvoke * invoke)1174 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1175   ArmVIXLAssembler* assembler = GetAssembler();
1176   LocationSummary* locations = invoke->GetLocations();
1177 
1178   const vixl32::Register str = InputRegisterAt(invoke, 0);
1179   const vixl32::Register arg = InputRegisterAt(invoke, 1);
1180   const vixl32::Register out = OutputRegister(invoke);
1181 
1182   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1183   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1184   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1185   vixl32::Register temp3;
1186   if (mirror::kUseStringCompression) {
1187     temp3 = RegisterFrom(locations->GetTemp(3));
1188   }
1189 
1190   vixl32::Label end;
1191   vixl32::Label different_compression;
1192 
1193   // Get offsets of count and value fields within a string object.
1194   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1195 
1196   // Note that the null check must have been done earlier.
1197   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1198 
1199   // Take slow path and throw if input can be and is null.
1200   SlowPathCodeARMVIXL* slow_path = nullptr;
1201   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1202   if (can_slow_path) {
1203     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1204     codegen_->AddSlowPath(slow_path);
1205     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
1206   }
1207 
1208   // Reference equality check, return 0 if same reference.
1209   __ Subs(out, str, arg);
1210   __ B(eq, &end);
1211 
1212   if (mirror::kUseStringCompression) {
1213     // Load `count` fields of this and argument strings.
1214     __ Ldr(temp3, MemOperand(str, count_offset));
1215     __ Ldr(temp2, MemOperand(arg, count_offset));
1216     // Extract lengths from the `count` fields.
1217     __ Lsr(temp0, temp3, 1u);
1218     __ Lsr(temp1, temp2, 1u);
1219   } else {
1220     // Load lengths of this and argument strings.
1221     __ Ldr(temp0, MemOperand(str, count_offset));
1222     __ Ldr(temp1, MemOperand(arg, count_offset));
1223   }
1224   // out = length diff.
1225   __ Subs(out, temp0, temp1);
1226   // temp0 = min(len(str), len(arg)).
1227 
1228   {
1229     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1230                            2 * kMaxInstructionSizeInBytes,
1231                            CodeBufferCheckScope::kMaximumSize);
1232 
1233     __ it(gt);
1234     __ mov(gt, temp0, temp1);
1235   }
1236 
1237   // Shorter string is empty?
1238   // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1239   // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1240   __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
1241 
1242   if (mirror::kUseStringCompression) {
1243     // Check if both strings using same compression style to use this comparison loop.
1244     __ Eors(temp2, temp2, temp3);
1245     __ Lsrs(temp2, temp2, 1u);
1246     __ B(cs, &different_compression);
1247     // For string compression, calculate the number of bytes to compare (not chars).
1248     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1249     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
1250 
1251     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1252                            2 * kMaxInstructionSizeInBytes,
1253                            CodeBufferCheckScope::kMaximumSize);
1254 
1255     __ it(ne);
1256     __ add(ne, temp0, temp0, temp0);
1257   }
1258 
1259 
1260   GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
1261 
1262   __ Bind(&end);
1263 
1264   if (can_slow_path) {
1265     __ Bind(slow_path->GetExitLabel());
1266   }
1267 }
1268 
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)1269 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1270                                                   HInvoke* invoke,
1271                                                   vixl32::Label* end,
1272                                                   vixl32::Label* different_compression) {
1273   LocationSummary* locations = invoke->GetLocations();
1274 
1275   const vixl32::Register str = InputRegisterAt(invoke, 0);
1276   const vixl32::Register arg = InputRegisterAt(invoke, 1);
1277   const vixl32::Register out = OutputRegister(invoke);
1278 
1279   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1280   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1281   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1282   vixl32::Register temp3;
1283   if (mirror::kUseStringCompression) {
1284     temp3 = RegisterFrom(locations->GetTemp(3));
1285   }
1286 
1287   vixl32::Label loop;
1288   vixl32::Label find_char_diff;
1289 
1290   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1291   // Store offset of string value in preparation for comparison loop.
1292   __ Mov(temp1, value_offset);
1293 
1294   // Assertions that must hold in order to compare multiple characters at a time.
1295   CHECK_ALIGNED(value_offset, 8);
1296   static_assert(IsAligned<8>(kObjectAlignment),
1297                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1298 
1299   const unsigned char_size = DataType::Size(DataType::Type::kUint16);
1300   DCHECK_EQ(char_size, 2u);
1301 
1302   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1303 
1304   vixl32::Label find_char_diff_2nd_cmp;
1305   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1306   __ Bind(&loop);
1307   vixl32::Register temp_reg = temps.Acquire();
1308   __ Ldr(temp_reg, MemOperand(str, temp1));
1309   __ Ldr(temp2, MemOperand(arg, temp1));
1310   __ Cmp(temp_reg, temp2);
1311   __ B(ne, &find_char_diff, /* is_far_target= */ false);
1312   __ Add(temp1, temp1, char_size * 2);
1313 
1314   __ Ldr(temp_reg, MemOperand(str, temp1));
1315   __ Ldr(temp2, MemOperand(arg, temp1));
1316   __ Cmp(temp_reg, temp2);
1317   __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
1318   __ Add(temp1, temp1, char_size * 2);
1319   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1320   __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1321   __ B(hi, &loop, /* is_far_target= */ false);
1322   __ B(end);
1323 
1324   __ Bind(&find_char_diff_2nd_cmp);
1325   if (mirror::kUseStringCompression) {
1326     __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
1327     __ B(ls, end, /* is_far_target= */ false);  // Was the second comparison fully beyond the end?
1328   } else {
1329     // Without string compression, we can start treating temp0 as signed
1330     // and rely on the signed comparison below.
1331     __ Sub(temp0, temp0, 2);
1332   }
1333 
1334   // Find the single character difference.
1335   __ Bind(&find_char_diff);
1336   // Get the bit position of the first character that differs.
1337   __ Eor(temp1, temp2, temp_reg);
1338   __ Rbit(temp1, temp1);
1339   __ Clz(temp1, temp1);
1340 
1341   // temp0 = number of characters remaining to compare.
1342   // (Without string compression, it could be < 1 if a difference is found by the second CMP
1343   // in the comparison loop, and after the end of the shorter string data).
1344 
1345   // Without string compression (temp1 >> 4) = character where difference occurs between the last
1346   // two words compared, in the interval [0,1].
1347   // (0 for low half-word different, 1 for high half-word different).
1348   // With string compression, (temp1 << 3) = byte where the difference occurs,
1349   // in the interval [0,3].
1350 
1351   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1352   // the remaining string data, so just return length diff (out).
1353   // The comparison is unsigned for string compression, otherwise signed.
1354   __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1355   __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
1356 
1357   // Extract the characters and calculate the difference.
1358   if (mirror::kUseStringCompression) {
1359     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1360     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1361     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
1362     __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
1363     __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
1364     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
1365     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
1366     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
1367     __ Lsr(out, temp_reg, temp1);                       // Extract first character.
1368     __ And(temp2, temp2, temp3);
1369     __ And(out, out, temp3);
1370   } else {
1371     __ Bic(temp1, temp1, 0xf);
1372     __ Lsr(temp2, temp2, temp1);
1373     __ Lsr(out, temp_reg, temp1);
1374     __ Movt(temp2, 0);
1375     __ Movt(out, 0);
1376   }
1377 
1378   __ Sub(out, out, temp2);
1379   temps.Release(temp_reg);
1380 
1381   if (mirror::kUseStringCompression) {
1382     __ B(end);
1383     __ Bind(different_compression);
1384 
1385     // Comparison for different compression style.
1386     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1387     DCHECK_EQ(c_char_size, 1u);
1388 
1389     // We want to free up the temp3, currently holding `str.count`, for comparison.
1390     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1391     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1392     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1393     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1394     __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
1395     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1396     __ Mov(temp1, str);
1397     __ Mov(temp2, arg);
1398     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
1399     {
1400       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1401                              3 * kMaxInstructionSizeInBytes,
1402                              CodeBufferCheckScope::kMaximumSize);
1403       __ itt(cs);                             // Interleave with selection of temp1 and temp2.
1404       __ mov(cs, temp1, arg);                 // Preserves flags.
1405       __ mov(cs, temp2, str);                 // Preserves flags.
1406     }
1407     __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
1408 
1409     // Adjust temp1 and temp2 from string pointers to data pointers.
1410     __ Add(temp1, temp1, value_offset);
1411     __ Add(temp2, temp2, value_offset);
1412 
1413     vixl32::Label different_compression_loop;
1414     vixl32::Label different_compression_diff;
1415 
1416     // Main loop for different compression.
1417     temp_reg = temps.Acquire();
1418     __ Bind(&different_compression_loop);
1419     __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1420     __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
1421     __ Cmp(temp_reg, temp3);
1422     __ B(ne, &different_compression_diff, /* is_far_target= */ false);
1423     __ Subs(temp0, temp0, 2);
1424     __ B(hi, &different_compression_loop, /* is_far_target= */ false);
1425     __ B(end);
1426 
1427     // Calculate the difference.
1428     __ Bind(&different_compression_diff);
1429     __ Sub(out, temp_reg, temp3);
1430     temps.Release(temp_reg);
1431     // Flip the difference if the `arg` is compressed.
1432     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1433     __ Lsrs(temp0, temp0, 1u);
1434     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1435                   "Expecting 0=compressed, 1=uncompressed");
1436 
1437     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1438                            2 * kMaxInstructionSizeInBytes,
1439                            CodeBufferCheckScope::kMaximumSize);
1440     __ it(cc);
1441     __ rsb(cc, out, out, 0);
1442   }
1443 }
1444 
1445 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1446 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
1447 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
1448 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
1449 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
1450 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
1451 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1452 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1453   if (candidate->IsLoadString()) {
1454     HLoadString* load_string = candidate->AsLoadString();
1455     const DexFile& dex_file = load_string->GetDexFile();
1456     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1457   }
1458   return nullptr;
1459 }
1460 
VisitStringEquals(HInvoke * invoke)1461 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1462   LocationSummary* locations =
1463       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1464   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1465   locations->SetInAt(0, Location::RequiresRegister());
1466   locations->SetInAt(1, Location::RequiresRegister());
1467 
1468   // Temporary registers to store lengths of strings and for calculations.
1469   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1470   locations->AddTemp(LocationFrom(r0));
1471 
1472   // For the generic implementation and for long const strings we need an extra temporary.
1473   // We do not need it for short const strings, up to 4 bytes, see code generation below.
1474   uint32_t const_string_length = 0u;
1475   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1476   if (const_string == nullptr) {
1477     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1478   }
1479   bool is_compressed =
1480       mirror::kUseStringCompression &&
1481       const_string != nullptr &&
1482       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1483   if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
1484     locations->AddTemp(Location::RequiresRegister());
1485   }
1486 
1487   // TODO: If the String.equals() is used only for an immediately following HIf, we can
1488   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1489   // Then we shall need an extra temporary register instead of the output register.
1490   locations->SetOut(Location::RequiresRegister());
1491 }
1492 
VisitStringEquals(HInvoke * invoke)1493 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1494   ArmVIXLAssembler* assembler = GetAssembler();
1495   LocationSummary* locations = invoke->GetLocations();
1496 
1497   vixl32::Register str = InputRegisterAt(invoke, 0);
1498   vixl32::Register arg = InputRegisterAt(invoke, 1);
1499   vixl32::Register out = OutputRegister(invoke);
1500 
1501   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1502 
1503   vixl32::Label loop;
1504   vixl32::Label end;
1505   vixl32::Label return_true;
1506   vixl32::Label return_false;
1507   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
1508 
1509   // Get offsets of count, value, and class fields within a string object.
1510   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1511   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1512   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1513 
1514   // Note that the null check must have been done earlier.
1515   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1516 
1517   StringEqualsOptimizations optimizations(invoke);
1518   if (!optimizations.GetArgumentNotNull()) {
1519     // Check if input is null, return false if it is.
1520     __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
1521   }
1522 
1523   // Reference equality check, return true if same reference.
1524   __ Cmp(str, arg);
1525   __ B(eq, &return_true, /* is_far_target= */ false);
1526 
1527   if (!optimizations.GetArgumentIsString()) {
1528     // Instanceof check for the argument by comparing class fields.
1529     // All string objects must have the same type since String cannot be subclassed.
1530     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1531     // If the argument is a string object, its class field must be equal to receiver's class field.
1532     //
1533     // As the String class is expected to be non-movable, we can read the class
1534     // field from String.equals' arguments without read barriers.
1535     AssertNonMovableStringClass();
1536     // /* HeapReference<Class> */ temp = str->klass_
1537     __ Ldr(temp, MemOperand(str, class_offset));
1538     // /* HeapReference<Class> */ out = arg->klass_
1539     __ Ldr(out, MemOperand(arg, class_offset));
1540     // Also, because we use the previously loaded class references only in the
1541     // following comparison, we don't need to unpoison them.
1542     __ Cmp(temp, out);
1543     __ B(ne, &return_false, /* is_far_target= */ false);
1544   }
1545 
1546   // Check if one of the inputs is a const string. Do not special-case both strings
1547   // being const, such cases should be handled by constant folding if needed.
1548   uint32_t const_string_length = 0u;
1549   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1550   if (const_string == nullptr) {
1551     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1552     if (const_string != nullptr) {
1553       std::swap(str, arg);  // Make sure the const string is in `str`.
1554     }
1555   }
1556   bool is_compressed =
1557       mirror::kUseStringCompression &&
1558       const_string != nullptr &&
1559       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1560 
1561   if (const_string != nullptr) {
1562     // Load `count` field of the argument string and check if it matches the const string.
1563     // Also compares the compression style, if differs return false.
1564     __ Ldr(temp, MemOperand(arg, count_offset));
1565     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1566     __ B(ne, &return_false, /* is_far_target= */ false);
1567   } else {
1568     // Load `count` fields of this and argument strings.
1569     __ Ldr(temp, MemOperand(str, count_offset));
1570     __ Ldr(out, MemOperand(arg, count_offset));
1571     // Check if `count` fields are equal, return false if they're not.
1572     // Also compares the compression style, if differs return false.
1573     __ Cmp(temp, out);
1574     __ B(ne, &return_false, /* is_far_target= */ false);
1575   }
1576 
1577   // Assertions that must hold in order to compare strings 4 bytes at a time.
1578   // Ok to do this because strings are zero-padded to kObjectAlignment.
1579   DCHECK_ALIGNED(value_offset, 4);
1580   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1581 
1582   if (const_string != nullptr &&
1583       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1584                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
1585     // Load and compare the contents. Though we know the contents of the short const string
1586     // at compile time, materializing constants may be more code than loading from memory.
1587     int32_t offset = value_offset;
1588     size_t remaining_bytes =
1589         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
1590     while (remaining_bytes > sizeof(uint32_t)) {
1591       vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1592       UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1593       vixl32::Register temp2 = scratch_scope.Acquire();
1594       __ Ldrd(temp, temp1, MemOperand(str, offset));
1595       __ Ldrd(temp2, out, MemOperand(arg, offset));
1596       __ Cmp(temp, temp2);
1597       __ B(ne, &return_false, /* is_far_target= */ false);
1598       __ Cmp(temp1, out);
1599       __ B(ne, &return_false, /* is_far_target= */ false);
1600       offset += 2u * sizeof(uint32_t);
1601       remaining_bytes -= 2u * sizeof(uint32_t);
1602     }
1603     if (remaining_bytes != 0u) {
1604       __ Ldr(temp, MemOperand(str, offset));
1605       __ Ldr(out, MemOperand(arg, offset));
1606       __ Cmp(temp, out);
1607       __ B(ne, &return_false, /* is_far_target= */ false);
1608     }
1609   } else {
1610     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1611     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1612                   "Expecting 0=compressed, 1=uncompressed");
1613     __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
1614 
1615     if (mirror::kUseStringCompression) {
1616       // For string compression, calculate the number of bytes to compare (not chars).
1617       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1618       __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
1619       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1620                              2 * kMaxInstructionSizeInBytes,
1621                              CodeBufferCheckScope::kMaximumSize);
1622       __ it(cs);                                      // If uncompressed,
1623       __ add(cs, temp, temp, temp);                   //   double the byte count.
1624     }
1625 
1626     vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1627     UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1628     vixl32::Register temp2 = scratch_scope.Acquire();
1629 
1630     // Store offset of string value in preparation for comparison loop.
1631     __ Mov(temp1, value_offset);
1632 
1633     // Loop to compare strings 4 bytes at a time starting at the front of the string.
1634     __ Bind(&loop);
1635     __ Ldr(out, MemOperand(str, temp1));
1636     __ Ldr(temp2, MemOperand(arg, temp1));
1637     __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1638     __ Cmp(out, temp2);
1639     __ B(ne, &return_false, /* is_far_target= */ false);
1640     // With string compression, we have compared 4 bytes, otherwise 2 chars.
1641     __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1642     __ B(hi, &loop, /* is_far_target= */ false);
1643   }
1644 
1645   // Return true and exit the function.
1646   // If loop does not result in returning false, we return true.
1647   __ Bind(&return_true);
1648   __ Mov(out, 1);
1649   __ B(final_label);
1650 
1651   // Return false and exit the function.
1652   __ Bind(&return_false);
1653   __ Mov(out, 0);
1654 
1655   if (end.IsReferenced()) {
1656     __ Bind(&end);
1657   }
1658 }
1659 
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1660 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1661                                        ArmVIXLAssembler* assembler,
1662                                        CodeGeneratorARMVIXL* codegen,
1663                                        bool start_at_zero) {
1664   LocationSummary* locations = invoke->GetLocations();
1665 
1666   // Note that the null check must have been done earlier.
1667   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1668 
1669   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1670   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1671   SlowPathCodeARMVIXL* slow_path = nullptr;
1672   HInstruction* code_point = invoke->InputAt(1);
1673   if (code_point->IsIntConstant()) {
1674     if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1675         std::numeric_limits<uint16_t>::max()) {
1676       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1677       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1678       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1679       codegen->AddSlowPath(slow_path);
1680       __ B(slow_path->GetEntryLabel());
1681       __ Bind(slow_path->GetExitLabel());
1682       return;
1683     }
1684   } else if (code_point->GetType() != DataType::Type::kUint16) {
1685     vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1686     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1687     __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1688     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1689     codegen->AddSlowPath(slow_path);
1690     __ B(hs, slow_path->GetEntryLabel());
1691   }
1692 
1693   if (start_at_zero) {
1694     vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1695     DCHECK(tmp_reg.Is(r2));
1696     // Start-index = 0.
1697     __ Mov(tmp_reg, 0);
1698   }
1699 
1700   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1701   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1702 
1703   if (slow_path != nullptr) {
1704     __ Bind(slow_path->GetExitLabel());
1705   }
1706 }
1707 
VisitStringIndexOf(HInvoke * invoke)1708 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1709   LocationSummary* locations = new (allocator_) LocationSummary(
1710       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1711   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1712   // best to align the inputs accordingly.
1713   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1714   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1715   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1716   locations->SetOut(LocationFrom(r0));
1717 
1718   // Need to send start-index=0.
1719   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1720 }
1721 
VisitStringIndexOf(HInvoke * invoke)1722 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1723   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1724 }
1725 
VisitStringIndexOfAfter(HInvoke * invoke)1726 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1727   LocationSummary* locations = new (allocator_) LocationSummary(
1728       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1729   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1730   // best to align the inputs accordingly.
1731   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1732   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1733   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1734   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1735   locations->SetOut(LocationFrom(r0));
1736 }
1737 
VisitStringIndexOfAfter(HInvoke * invoke)1738 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1739   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1740 }
1741 
VisitStringNewStringFromBytes(HInvoke * invoke)1742 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1743   LocationSummary* locations = new (allocator_) LocationSummary(
1744       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1745   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1746   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1747   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1748   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1749   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1750   locations->SetOut(LocationFrom(r0));
1751 }
1752 
VisitStringNewStringFromBytes(HInvoke * invoke)1753 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1754   ArmVIXLAssembler* assembler = GetAssembler();
1755   vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1756   __ Cmp(byte_array, 0);
1757   SlowPathCodeARMVIXL* slow_path =
1758       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1759   codegen_->AddSlowPath(slow_path);
1760   __ B(eq, slow_path->GetEntryLabel());
1761 
1762   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1763   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1764   __ Bind(slow_path->GetExitLabel());
1765 }
1766 
VisitStringNewStringFromChars(HInvoke * invoke)1767 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1768   LocationSummary* locations =
1769       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1770   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1771   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1772   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1773   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1774   locations->SetOut(LocationFrom(r0));
1775 }
1776 
VisitStringNewStringFromChars(HInvoke * invoke)1777 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1778   // No need to emit code checking whether `locations->InAt(2)` is a null
1779   // pointer, as callers of the native method
1780   //
1781   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1782   //
1783   // all include a null check on `data` before calling that method.
1784   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1785   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1786 }
1787 
VisitStringNewStringFromString(HInvoke * invoke)1788 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1789   LocationSummary* locations = new (allocator_) LocationSummary(
1790       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1791   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1792   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1793   locations->SetOut(LocationFrom(r0));
1794 }
1795 
VisitStringNewStringFromString(HInvoke * invoke)1796 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1797   ArmVIXLAssembler* assembler = GetAssembler();
1798   vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1799   __ Cmp(string_to_copy, 0);
1800   SlowPathCodeARMVIXL* slow_path =
1801       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1802   codegen_->AddSlowPath(slow_path);
1803   __ B(eq, slow_path->GetEntryLabel());
1804 
1805   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1806   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1807 
1808   __ Bind(slow_path->GetExitLabel());
1809 }
1810 
VisitSystemArrayCopy(HInvoke * invoke)1811 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1812   // The only read barrier implementation supporting the
1813   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1814   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1815     return;
1816   }
1817 
1818   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1819   LocationSummary* locations = invoke->GetLocations();
1820   if (locations == nullptr) {
1821     return;
1822   }
1823 
1824   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1825   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1826   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1827 
1828   if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1829     locations->SetInAt(1, Location::RequiresRegister());
1830   }
1831   if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1832     locations->SetInAt(3, Location::RequiresRegister());
1833   }
1834   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1835     locations->SetInAt(4, Location::RequiresRegister());
1836   }
1837   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1838     // Temporary register IP cannot be used in
1839     // ReadBarrierSystemArrayCopySlowPathARM (because that register
1840     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1841     // temporary register from the register allocator.
1842     locations->AddTemp(Location::RequiresRegister());
1843   }
1844 }
1845 
CheckPosition(ArmVIXLAssembler * assembler,Location pos,vixl32::Register input,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_input_length=false)1846 static void CheckPosition(ArmVIXLAssembler* assembler,
1847                           Location pos,
1848                           vixl32::Register input,
1849                           Location length,
1850                           SlowPathCodeARMVIXL* slow_path,
1851                           vixl32::Register temp,
1852                           bool length_is_input_length = false) {
1853   // Where is the length in the Array?
1854   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1855 
1856   if (pos.IsConstant()) {
1857     int32_t pos_const = Int32ConstantFrom(pos);
1858     if (pos_const == 0) {
1859       if (!length_is_input_length) {
1860         // Check that length(input) >= length.
1861         __ Ldr(temp, MemOperand(input, length_offset));
1862         if (length.IsConstant()) {
1863           __ Cmp(temp, Int32ConstantFrom(length));
1864         } else {
1865           __ Cmp(temp, RegisterFrom(length));
1866         }
1867         __ B(lt, slow_path->GetEntryLabel());
1868       }
1869     } else {
1870       // Check that length(input) >= pos.
1871       __ Ldr(temp, MemOperand(input, length_offset));
1872       __ Subs(temp, temp, pos_const);
1873       __ B(lt, slow_path->GetEntryLabel());
1874 
1875       // Check that (length(input) - pos) >= length.
1876       if (length.IsConstant()) {
1877         __ Cmp(temp, Int32ConstantFrom(length));
1878       } else {
1879         __ Cmp(temp, RegisterFrom(length));
1880       }
1881       __ B(lt, slow_path->GetEntryLabel());
1882     }
1883   } else if (length_is_input_length) {
1884     // The only way the copy can succeed is if pos is zero.
1885     vixl32::Register pos_reg = RegisterFrom(pos);
1886     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1887   } else {
1888     // Check that pos >= 0.
1889     vixl32::Register pos_reg = RegisterFrom(pos);
1890     __ Cmp(pos_reg, 0);
1891     __ B(lt, slow_path->GetEntryLabel());
1892 
1893     // Check that pos <= length(input).
1894     __ Ldr(temp, MemOperand(input, length_offset));
1895     __ Subs(temp, temp, pos_reg);
1896     __ B(lt, slow_path->GetEntryLabel());
1897 
1898     // Check that (length(input) - pos) >= length.
1899     if (length.IsConstant()) {
1900       __ Cmp(temp, Int32ConstantFrom(length));
1901     } else {
1902       __ Cmp(temp, RegisterFrom(length));
1903     }
1904     __ B(lt, slow_path->GetEntryLabel());
1905   }
1906 }
1907 
VisitSystemArrayCopy(HInvoke * invoke)1908 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1909   // The only read barrier implementation supporting the
1910   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1911   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1912 
1913   ArmVIXLAssembler* assembler = GetAssembler();
1914   LocationSummary* locations = invoke->GetLocations();
1915 
1916   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1917   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1918   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1919   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1920   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1921 
1922   vixl32::Register src = InputRegisterAt(invoke, 0);
1923   Location src_pos = locations->InAt(1);
1924   vixl32::Register dest = InputRegisterAt(invoke, 2);
1925   Location dest_pos = locations->InAt(3);
1926   Location length = locations->InAt(4);
1927   Location temp1_loc = locations->GetTemp(0);
1928   vixl32::Register temp1 = RegisterFrom(temp1_loc);
1929   Location temp2_loc = locations->GetTemp(1);
1930   vixl32::Register temp2 = RegisterFrom(temp2_loc);
1931   Location temp3_loc = locations->GetTemp(2);
1932   vixl32::Register temp3 = RegisterFrom(temp3_loc);
1933 
1934   SlowPathCodeARMVIXL* intrinsic_slow_path =
1935       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1936   codegen_->AddSlowPath(intrinsic_slow_path);
1937 
1938   vixl32::Label conditions_on_positions_validated;
1939   SystemArrayCopyOptimizations optimizations(invoke);
1940 
1941   // If source and destination are the same, we go to slow path if we need to do
1942   // forward copying.
1943   if (src_pos.IsConstant()) {
1944     int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1945     if (dest_pos.IsConstant()) {
1946       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1947       if (optimizations.GetDestinationIsSource()) {
1948         // Checked when building locations.
1949         DCHECK_GE(src_pos_constant, dest_pos_constant);
1950       } else if (src_pos_constant < dest_pos_constant) {
1951         __ Cmp(src, dest);
1952         __ B(eq, intrinsic_slow_path->GetEntryLabel());
1953       }
1954 
1955       // Checked when building locations.
1956       DCHECK(!optimizations.GetDestinationIsSource()
1957              || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1958     } else {
1959       if (!optimizations.GetDestinationIsSource()) {
1960         __ Cmp(src, dest);
1961         __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1962       }
1963       __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1964       __ B(gt, intrinsic_slow_path->GetEntryLabel());
1965     }
1966   } else {
1967     if (!optimizations.GetDestinationIsSource()) {
1968       __ Cmp(src, dest);
1969       __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1970     }
1971     if (dest_pos.IsConstant()) {
1972       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1973       __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1974     } else {
1975       __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1976     }
1977     __ B(lt, intrinsic_slow_path->GetEntryLabel());
1978   }
1979 
1980   __ Bind(&conditions_on_positions_validated);
1981 
1982   if (!optimizations.GetSourceIsNotNull()) {
1983     // Bail out if the source is null.
1984     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1985   }
1986 
1987   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1988     // Bail out if the destination is null.
1989     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1990   }
1991 
1992   // If the length is negative, bail out.
1993   // We have already checked in the LocationsBuilder for the constant case.
1994   if (!length.IsConstant() &&
1995       !optimizations.GetCountIsSourceLength() &&
1996       !optimizations.GetCountIsDestinationLength()) {
1997     __ Cmp(RegisterFrom(length), 0);
1998     __ B(lt, intrinsic_slow_path->GetEntryLabel());
1999   }
2000 
2001   // Validity checks: source.
2002   CheckPosition(assembler,
2003                 src_pos,
2004                 src,
2005                 length,
2006                 intrinsic_slow_path,
2007                 temp1,
2008                 optimizations.GetCountIsSourceLength());
2009 
2010   // Validity checks: dest.
2011   CheckPosition(assembler,
2012                 dest_pos,
2013                 dest,
2014                 length,
2015                 intrinsic_slow_path,
2016                 temp1,
2017                 optimizations.GetCountIsDestinationLength());
2018 
2019   if (!optimizations.GetDoesNotNeedTypeCheck()) {
2020     // Check whether all elements of the source array are assignable to the component
2021     // type of the destination array. We do two checks: the classes are the same,
2022     // or the destination is Object[]. If none of these checks succeed, we go to the
2023     // slow path.
2024 
2025     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2026       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2027         // /* HeapReference<Class> */ temp1 = src->klass_
2028         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2029             invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
2030         // Bail out if the source is not a non primitive array.
2031         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2032         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2033             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
2034         __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
2035         // If heap poisoning is enabled, `temp1` has been unpoisoned
2036         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2037         // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2038         __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
2039         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2040         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2041       }
2042 
2043       // /* HeapReference<Class> */ temp1 = dest->klass_
2044       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2045           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
2046 
2047       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2048         // Bail out if the destination is not a non primitive array.
2049         //
2050         // Register `temp1` is not trashed by the read barrier emitted
2051         // by GenerateFieldLoadWithBakerReadBarrier below, as that
2052         // method produces a call to a ReadBarrierMarkRegX entry point,
2053         // which saves all potentially live registers, including
2054         // temporaries such a `temp1`.
2055         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2056         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2057             invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false);
2058         __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
2059         // If heap poisoning is enabled, `temp2` has been unpoisoned
2060         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2061         // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2062         __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
2063         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2064         __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
2065       }
2066 
2067       // For the same reason given earlier, `temp1` is not trashed by the
2068       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2069       // /* HeapReference<Class> */ temp2 = src->klass_
2070       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2071           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
2072       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2073       __ Cmp(temp1, temp2);
2074 
2075       if (optimizations.GetDestinationIsTypedObjectArray()) {
2076         vixl32::Label do_copy;
2077         __ B(eq, &do_copy, /* is_far_target= */ false);
2078         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2079         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2080             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
2081         // /* HeapReference<Class> */ temp1 = temp1->super_class_
2082         // We do not need to emit a read barrier for the following
2083         // heap reference load, as `temp1` is only used in a
2084         // comparison with null below, and this reference is not
2085         // kept afterwards.
2086         __ Ldr(temp1, MemOperand(temp1, super_offset));
2087         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2088         __ Bind(&do_copy);
2089       } else {
2090         __ B(ne, intrinsic_slow_path->GetEntryLabel());
2091       }
2092     } else {
2093       // Non read barrier code.
2094 
2095       // /* HeapReference<Class> */ temp1 = dest->klass_
2096       __ Ldr(temp1, MemOperand(dest, class_offset));
2097       // /* HeapReference<Class> */ temp2 = src->klass_
2098       __ Ldr(temp2, MemOperand(src, class_offset));
2099       bool did_unpoison = false;
2100       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2101           !optimizations.GetSourceIsNonPrimitiveArray()) {
2102         // One or two of the references need to be unpoisoned. Unpoison them
2103         // both to make the identity check valid.
2104         assembler->MaybeUnpoisonHeapReference(temp1);
2105         assembler->MaybeUnpoisonHeapReference(temp2);
2106         did_unpoison = true;
2107       }
2108 
2109       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2110         // Bail out if the destination is not a non primitive array.
2111         // /* HeapReference<Class> */ temp3 = temp1->component_type_
2112         __ Ldr(temp3, MemOperand(temp1, component_offset));
2113         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2114         assembler->MaybeUnpoisonHeapReference(temp3);
2115         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2116         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2117         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2118         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2119       }
2120 
2121       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2122         // Bail out if the source is not a non primitive array.
2123         // /* HeapReference<Class> */ temp3 = temp2->component_type_
2124         __ Ldr(temp3, MemOperand(temp2, component_offset));
2125         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2126         assembler->MaybeUnpoisonHeapReference(temp3);
2127         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2128         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2129         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2130         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2131       }
2132 
2133       __ Cmp(temp1, temp2);
2134 
2135       if (optimizations.GetDestinationIsTypedObjectArray()) {
2136         vixl32::Label do_copy;
2137         __ B(eq, &do_copy, /* is_far_target= */ false);
2138         if (!did_unpoison) {
2139           assembler->MaybeUnpoisonHeapReference(temp1);
2140         }
2141         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2142         __ Ldr(temp1, MemOperand(temp1, component_offset));
2143         assembler->MaybeUnpoisonHeapReference(temp1);
2144         // /* HeapReference<Class> */ temp1 = temp1->super_class_
2145         __ Ldr(temp1, MemOperand(temp1, super_offset));
2146         // No need to unpoison the result, we're comparing against null.
2147         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2148         __ Bind(&do_copy);
2149       } else {
2150         __ B(ne, intrinsic_slow_path->GetEntryLabel());
2151       }
2152     }
2153   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2154     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2155     // Bail out if the source is not a non primitive array.
2156     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2157       // /* HeapReference<Class> */ temp1 = src->klass_
2158       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2159           invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
2160       // /* HeapReference<Class> */ temp3 = temp1->component_type_
2161       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2162           invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
2163       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2164       // If heap poisoning is enabled, `temp3` has been unpoisoned
2165       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2166     } else {
2167       // /* HeapReference<Class> */ temp1 = src->klass_
2168       __ Ldr(temp1, MemOperand(src, class_offset));
2169       assembler->MaybeUnpoisonHeapReference(temp1);
2170       // /* HeapReference<Class> */ temp3 = temp1->component_type_
2171       __ Ldr(temp3, MemOperand(temp1, component_offset));
2172       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2173       assembler->MaybeUnpoisonHeapReference(temp3);
2174     }
2175     // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2176     __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2177     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2178     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2179   }
2180 
2181   if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
2182     // Null constant length: not need to emit the loop code at all.
2183   } else {
2184     vixl32::Label done;
2185     const DataType::Type type = DataType::Type::kReference;
2186     const int32_t element_size = DataType::Size(type);
2187 
2188     if (length.IsRegister()) {
2189       // Don't enter the copy loop if the length is null.
2190       __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
2191     }
2192 
2193     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2194       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2195 
2196       // SystemArrayCopy implementation for Baker read barriers (see
2197       // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
2198       //
2199       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2200       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2201       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
2202       //   if (is_gray) {
2203       //     // Slow-path copy.
2204       //     do {
2205       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2206       //     } while (src_ptr != end_ptr)
2207       //   } else {
2208       //     // Fast-path copy.
2209       //     do {
2210       //       *dest_ptr++ = *src_ptr++;
2211       //     } while (src_ptr != end_ptr)
2212       //   }
2213 
2214       // /* int32_t */ monitor = src->monitor_
2215       __ Ldr(temp2, MemOperand(src, monitor_offset));
2216       // /* LockWord */ lock_word = LockWord(monitor)
2217       static_assert(sizeof(LockWord) == sizeof(int32_t),
2218                     "art::LockWord and int32_t have different sizes.");
2219 
2220       // Introduce a dependency on the lock_word including the rb_state,
2221       // which shall prevent load-load reordering without using
2222       // a memory barrier (which would be more expensive).
2223       // `src` is unchanged by this operation, but its value now depends
2224       // on `temp2`.
2225       __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2226 
2227       // Compute the base source address in `temp1`.
2228       // Note that `temp1` (the base source address) is computed from
2229       // `src` (and `src_pos`) here, and thus honors the artificial
2230       // dependency of `src` on `temp2`.
2231       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2232       // Compute the end source address in `temp3`.
2233       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2234       // The base destination address is computed later, as `temp2` is
2235       // used for intermediate computations.
2236 
2237       // Slow path used to copy array when `src` is gray.
2238       // Note that the base destination address is computed in `temp2`
2239       // by the slow path code.
2240       SlowPathCodeARMVIXL* read_barrier_slow_path =
2241           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2242       codegen_->AddSlowPath(read_barrier_slow_path);
2243 
2244       // Given the numeric representation, it's enough to check the low bit of the
2245       // rb_state. We do that by shifting the bit out of the lock word with LSRS
2246       // which can be a 16-bit instruction unlike the TST immediate.
2247       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2248       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2249       __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2250       // Carry flag is the last bit shifted out by LSRS.
2251       __ B(cs, read_barrier_slow_path->GetEntryLabel());
2252 
2253       // Fast-path copy.
2254       // Compute the base destination address in `temp2`.
2255       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2256       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2257       // poison/unpoison.
2258       vixl32::Label loop;
2259       __ Bind(&loop);
2260       {
2261         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2262         const vixl32::Register temp_reg = temps.Acquire();
2263         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2264         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2265       }
2266       __ Cmp(temp1, temp3);
2267       __ B(ne, &loop, /* is_far_target= */ false);
2268 
2269       __ Bind(read_barrier_slow_path->GetExitLabel());
2270     } else {
2271       // Non read barrier code.
2272       // Compute the base source address in `temp1`.
2273       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2274       // Compute the base destination address in `temp2`.
2275       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2276       // Compute the end source address in `temp3`.
2277       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2278       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2279       // poison/unpoison.
2280       vixl32::Label loop;
2281       __ Bind(&loop);
2282       {
2283         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2284         const vixl32::Register temp_reg = temps.Acquire();
2285         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2286         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2287       }
2288       __ Cmp(temp1, temp3);
2289       __ B(ne, &loop, /* is_far_target= */ false);
2290     }
2291     __ Bind(&done);
2292   }
2293 
2294   // We only need one card marking on the destination array.
2295   codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* can_be_null= */ false);
2296 
2297   __ Bind(intrinsic_slow_path->GetExitLabel());
2298 }
2299 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2300 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2301   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2302   // the code generator. Furthermore, the register allocator creates fixed live intervals
2303   // for all caller-saved registers because we are doing a function call. As a result, if
2304   // the input and output locations are unallocated, the register allocator runs out of
2305   // registers and fails; however, a debuggable graph is not the common case.
2306   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2307     return;
2308   }
2309 
2310   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2311   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2312   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2313 
2314   LocationSummary* const locations =
2315       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2316   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2317 
2318   locations->SetInAt(0, Location::RequiresFpuRegister());
2319   locations->SetOut(Location::RequiresFpuRegister());
2320   // Native code uses the soft float ABI.
2321   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2322   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2323 }
2324 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2325 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2326   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2327   // the code generator. Furthermore, the register allocator creates fixed live intervals
2328   // for all caller-saved registers because we are doing a function call. As a result, if
2329   // the input and output locations are unallocated, the register allocator runs out of
2330   // registers and fails; however, a debuggable graph is not the common case.
2331   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2332     return;
2333   }
2334 
2335   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2336   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2337   DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
2338   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2339 
2340   LocationSummary* const locations =
2341       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2342   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2343 
2344   locations->SetInAt(0, Location::RequiresFpuRegister());
2345   locations->SetInAt(1, Location::RequiresFpuRegister());
2346   locations->SetOut(Location::RequiresFpuRegister());
2347   // Native code uses the soft float ABI.
2348   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2349   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2350   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2351   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2352 }
2353 
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2354 static void GenFPToFPCall(HInvoke* invoke,
2355                           ArmVIXLAssembler* assembler,
2356                           CodeGeneratorARMVIXL* codegen,
2357                           QuickEntrypointEnum entry) {
2358   LocationSummary* const locations = invoke->GetLocations();
2359 
2360   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2361   DCHECK(locations->WillCall() && locations->Intrinsified());
2362 
2363   // Native code uses the soft float ABI.
2364   __ Vmov(RegisterFrom(locations->GetTemp(0)),
2365           RegisterFrom(locations->GetTemp(1)),
2366           InputDRegisterAt(invoke, 0));
2367   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2368   __ Vmov(OutputDRegister(invoke),
2369           RegisterFrom(locations->GetTemp(0)),
2370           RegisterFrom(locations->GetTemp(1)));
2371 }
2372 
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2373 static void GenFPFPToFPCall(HInvoke* invoke,
2374                             ArmVIXLAssembler* assembler,
2375                             CodeGeneratorARMVIXL* codegen,
2376                             QuickEntrypointEnum entry) {
2377   LocationSummary* const locations = invoke->GetLocations();
2378 
2379   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2380   DCHECK(locations->WillCall() && locations->Intrinsified());
2381 
2382   // Native code uses the soft float ABI.
2383   __ Vmov(RegisterFrom(locations->GetTemp(0)),
2384           RegisterFrom(locations->GetTemp(1)),
2385           InputDRegisterAt(invoke, 0));
2386   __ Vmov(RegisterFrom(locations->GetTemp(2)),
2387           RegisterFrom(locations->GetTemp(3)),
2388           InputDRegisterAt(invoke, 1));
2389   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2390   __ Vmov(OutputDRegister(invoke),
2391           RegisterFrom(locations->GetTemp(0)),
2392           RegisterFrom(locations->GetTemp(1)));
2393 }
2394 
VisitMathCos(HInvoke * invoke)2395 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2396   CreateFPToFPCallLocations(allocator_, invoke);
2397 }
2398 
VisitMathCos(HInvoke * invoke)2399 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2400   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2401 }
2402 
VisitMathSin(HInvoke * invoke)2403 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2404   CreateFPToFPCallLocations(allocator_, invoke);
2405 }
2406 
VisitMathSin(HInvoke * invoke)2407 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2408   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2409 }
2410 
VisitMathAcos(HInvoke * invoke)2411 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2412   CreateFPToFPCallLocations(allocator_, invoke);
2413 }
2414 
VisitMathAcos(HInvoke * invoke)2415 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2416   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2417 }
2418 
VisitMathAsin(HInvoke * invoke)2419 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2420   CreateFPToFPCallLocations(allocator_, invoke);
2421 }
2422 
VisitMathAsin(HInvoke * invoke)2423 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2424   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2425 }
2426 
VisitMathAtan(HInvoke * invoke)2427 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2428   CreateFPToFPCallLocations(allocator_, invoke);
2429 }
2430 
VisitMathAtan(HInvoke * invoke)2431 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2432   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2433 }
2434 
VisitMathCbrt(HInvoke * invoke)2435 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2436   CreateFPToFPCallLocations(allocator_, invoke);
2437 }
2438 
VisitMathCbrt(HInvoke * invoke)2439 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2440   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2441 }
2442 
VisitMathCosh(HInvoke * invoke)2443 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2444   CreateFPToFPCallLocations(allocator_, invoke);
2445 }
2446 
VisitMathCosh(HInvoke * invoke)2447 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2448   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2449 }
2450 
VisitMathExp(HInvoke * invoke)2451 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2452   CreateFPToFPCallLocations(allocator_, invoke);
2453 }
2454 
VisitMathExp(HInvoke * invoke)2455 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2456   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2457 }
2458 
VisitMathExpm1(HInvoke * invoke)2459 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2460   CreateFPToFPCallLocations(allocator_, invoke);
2461 }
2462 
VisitMathExpm1(HInvoke * invoke)2463 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2464   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2465 }
2466 
VisitMathLog(HInvoke * invoke)2467 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2468   CreateFPToFPCallLocations(allocator_, invoke);
2469 }
2470 
VisitMathLog(HInvoke * invoke)2471 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2472   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2473 }
2474 
VisitMathLog10(HInvoke * invoke)2475 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2476   CreateFPToFPCallLocations(allocator_, invoke);
2477 }
2478 
VisitMathLog10(HInvoke * invoke)2479 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2480   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2481 }
2482 
VisitMathSinh(HInvoke * invoke)2483 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2484   CreateFPToFPCallLocations(allocator_, invoke);
2485 }
2486 
VisitMathSinh(HInvoke * invoke)2487 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2488   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2489 }
2490 
VisitMathTan(HInvoke * invoke)2491 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2492   CreateFPToFPCallLocations(allocator_, invoke);
2493 }
2494 
VisitMathTan(HInvoke * invoke)2495 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2496   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2497 }
2498 
VisitMathTanh(HInvoke * invoke)2499 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2500   CreateFPToFPCallLocations(allocator_, invoke);
2501 }
2502 
VisitMathTanh(HInvoke * invoke)2503 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2504   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2505 }
2506 
VisitMathAtan2(HInvoke * invoke)2507 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2508   CreateFPFPToFPCallLocations(allocator_, invoke);
2509 }
2510 
VisitMathAtan2(HInvoke * invoke)2511 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2512   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2513 }
2514 
VisitMathPow(HInvoke * invoke)2515 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
2516   CreateFPFPToFPCallLocations(allocator_, invoke);
2517 }
2518 
VisitMathPow(HInvoke * invoke)2519 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
2520   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
2521 }
2522 
VisitMathHypot(HInvoke * invoke)2523 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2524   CreateFPFPToFPCallLocations(allocator_, invoke);
2525 }
2526 
VisitMathHypot(HInvoke * invoke)2527 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2528   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2529 }
2530 
VisitMathNextAfter(HInvoke * invoke)2531 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2532   CreateFPFPToFPCallLocations(allocator_, invoke);
2533 }
2534 
VisitMathNextAfter(HInvoke * invoke)2535 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2536   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2537 }
2538 
VisitIntegerReverse(HInvoke * invoke)2539 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2540   CreateIntToIntLocations(allocator_, invoke);
2541 }
2542 
VisitIntegerReverse(HInvoke * invoke)2543 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2544   ArmVIXLAssembler* assembler = GetAssembler();
2545   __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2546 }
2547 
VisitLongReverse(HInvoke * invoke)2548 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2549   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2550 }
2551 
VisitLongReverse(HInvoke * invoke)2552 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2553   ArmVIXLAssembler* assembler = GetAssembler();
2554   LocationSummary* locations = invoke->GetLocations();
2555 
2556   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
2557   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
2558   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2559   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2560 
2561   __ Rbit(out_reg_lo, in_reg_hi);
2562   __ Rbit(out_reg_hi, in_reg_lo);
2563 }
2564 
VisitIntegerReverseBytes(HInvoke * invoke)2565 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2566   CreateIntToIntLocations(allocator_, invoke);
2567 }
2568 
VisitIntegerReverseBytes(HInvoke * invoke)2569 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2570   ArmVIXLAssembler* assembler = GetAssembler();
2571   __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2572 }
2573 
VisitLongReverseBytes(HInvoke * invoke)2574 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2575   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2576 }
2577 
VisitLongReverseBytes(HInvoke * invoke)2578 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2579   ArmVIXLAssembler* assembler = GetAssembler();
2580   LocationSummary* locations = invoke->GetLocations();
2581 
2582   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
2583   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
2584   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2585   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2586 
2587   __ Rev(out_reg_lo, in_reg_hi);
2588   __ Rev(out_reg_hi, in_reg_lo);
2589 }
2590 
VisitShortReverseBytes(HInvoke * invoke)2591 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2592   CreateIntToIntLocations(allocator_, invoke);
2593 }
2594 
VisitShortReverseBytes(HInvoke * invoke)2595 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2596   ArmVIXLAssembler* assembler = GetAssembler();
2597   __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2598 }
2599 
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)2600 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
2601   DCHECK(DataType::IsIntOrLongType(type)) << type;
2602   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
2603   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
2604 
2605   bool is_long = type == DataType::Type::kInt64;
2606   LocationSummary* locations = instr->GetLocations();
2607   Location in = locations->InAt(0);
2608   vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2609   vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2610   vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2611   vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2612   vixl32::Register  out_r = OutputRegister(instr);
2613 
2614   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2615   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2616   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2617   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2618   __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
2619   __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
2620   __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
2621   __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
2622   if (is_long) {
2623     __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
2624   }
2625   __ Vmov(out_r, tmp_s);
2626 }
2627 
VisitIntegerBitCount(HInvoke * invoke)2628 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2629   CreateIntToIntLocations(allocator_, invoke);
2630   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2631 }
2632 
VisitIntegerBitCount(HInvoke * invoke)2633 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2634   GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
2635 }
2636 
VisitLongBitCount(HInvoke * invoke)2637 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2638   VisitIntegerBitCount(invoke);
2639 }
2640 
VisitLongBitCount(HInvoke * invoke)2641 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2642   GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
2643 }
2644 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2645 static void GenHighestOneBit(HInvoke* invoke,
2646                              DataType::Type type,
2647                              CodeGeneratorARMVIXL* codegen) {
2648   DCHECK(DataType::IsIntOrLongType(type));
2649 
2650   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2651   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2652   const vixl32::Register temp = temps.Acquire();
2653 
2654   if (type == DataType::Type::kInt64) {
2655     LocationSummary* locations = invoke->GetLocations();
2656     Location in = locations->InAt(0);
2657     Location out = locations->Out();
2658 
2659     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2660     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2661     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2662     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2663 
2664     __ Mov(temp, 0x80000000);  // Modified immediate.
2665     __ Clz(out_reg_lo, in_reg_lo);
2666     __ Clz(out_reg_hi, in_reg_hi);
2667     __ Lsr(out_reg_lo, temp, out_reg_lo);
2668     __ Lsrs(out_reg_hi, temp, out_reg_hi);
2669 
2670     // Discard result for lowest 32 bits if highest 32 bits are not zero.
2671     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2672     // we check that the output is in a low register, so that a 16-bit MOV
2673     // encoding can be used. If output is in a high register, then we generate
2674     // 4 more bytes of code to avoid a branch.
2675     Operand mov_src(0);
2676     if (!out_reg_lo.IsLow()) {
2677       __ Mov(LeaveFlags, temp, 0);
2678       mov_src = Operand(temp);
2679     }
2680     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2681                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2682                                   CodeBufferCheckScope::kExactSize);
2683     __ it(ne);
2684     __ mov(ne, out_reg_lo, mov_src);
2685   } else {
2686     vixl32::Register out = OutputRegister(invoke);
2687     vixl32::Register in = InputRegisterAt(invoke, 0);
2688 
2689     __ Mov(temp, 0x80000000);  // Modified immediate.
2690     __ Clz(out, in);
2691     __ Lsr(out, temp, out);
2692   }
2693 }
2694 
VisitIntegerHighestOneBit(HInvoke * invoke)2695 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2696   CreateIntToIntLocations(allocator_, invoke);
2697 }
2698 
VisitIntegerHighestOneBit(HInvoke * invoke)2699 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2700   GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2701 }
2702 
VisitLongHighestOneBit(HInvoke * invoke)2703 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2704   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2705 }
2706 
VisitLongHighestOneBit(HInvoke * invoke)2707 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2708   GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2709 }
2710 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2711 static void GenLowestOneBit(HInvoke* invoke,
2712                             DataType::Type type,
2713                             CodeGeneratorARMVIXL* codegen) {
2714   DCHECK(DataType::IsIntOrLongType(type));
2715 
2716   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2717   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2718   const vixl32::Register temp = temps.Acquire();
2719 
2720   if (type == DataType::Type::kInt64) {
2721     LocationSummary* locations = invoke->GetLocations();
2722     Location in = locations->InAt(0);
2723     Location out = locations->Out();
2724 
2725     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2726     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2727     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2728     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2729 
2730     __ Rsb(out_reg_hi, in_reg_hi, 0);
2731     __ Rsb(out_reg_lo, in_reg_lo, 0);
2732     __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2733     // The result of this operation is 0 iff in_reg_lo is 0
2734     __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2735 
2736     // Discard result for highest 32 bits if lowest 32 bits are not zero.
2737     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2738     // we check that the output is in a low register, so that a 16-bit MOV
2739     // encoding can be used. If output is in a high register, then we generate
2740     // 4 more bytes of code to avoid a branch.
2741     Operand mov_src(0);
2742     if (!out_reg_lo.IsLow()) {
2743       __ Mov(LeaveFlags, temp, 0);
2744       mov_src = Operand(temp);
2745     }
2746     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2747                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2748                                   CodeBufferCheckScope::kExactSize);
2749     __ it(ne);
2750     __ mov(ne, out_reg_hi, mov_src);
2751   } else {
2752     vixl32::Register out = OutputRegister(invoke);
2753     vixl32::Register in = InputRegisterAt(invoke, 0);
2754 
2755     __ Rsb(temp, in, 0);
2756     __ And(out, temp, in);
2757   }
2758 }
2759 
VisitIntegerLowestOneBit(HInvoke * invoke)2760 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2761   CreateIntToIntLocations(allocator_, invoke);
2762 }
2763 
VisitIntegerLowestOneBit(HInvoke * invoke)2764 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2765   GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2766 }
2767 
VisitLongLowestOneBit(HInvoke * invoke)2768 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2769   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2770 }
2771 
VisitLongLowestOneBit(HInvoke * invoke)2772 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2773   GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2774 }
2775 
VisitStringGetCharsNoCheck(HInvoke * invoke)2776 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2777   LocationSummary* locations =
2778       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2779   locations->SetInAt(0, Location::RequiresRegister());
2780   locations->SetInAt(1, Location::RequiresRegister());
2781   locations->SetInAt(2, Location::RequiresRegister());
2782   locations->SetInAt(3, Location::RequiresRegister());
2783   locations->SetInAt(4, Location::RequiresRegister());
2784 
2785   // Temporary registers to store lengths of strings and for calculations.
2786   locations->AddTemp(Location::RequiresRegister());
2787   locations->AddTemp(Location::RequiresRegister());
2788   locations->AddTemp(Location::RequiresRegister());
2789 }
2790 
VisitStringGetCharsNoCheck(HInvoke * invoke)2791 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2792   ArmVIXLAssembler* assembler = GetAssembler();
2793   LocationSummary* locations = invoke->GetLocations();
2794 
2795   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2796   const size_t char_size = DataType::Size(DataType::Type::kUint16);
2797   DCHECK_EQ(char_size, 2u);
2798 
2799   // Location of data in char array buffer.
2800   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2801 
2802   // Location of char array data in string.
2803   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2804 
2805   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2806   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2807   vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2808   vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2809   vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2810   vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2811   vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2812 
2813   vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2814   vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2815   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2816 
2817   vixl32::Label done, compressed_string_loop;
2818   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2819   // dst to be copied.
2820   __ Add(dst_ptr, dstObj, data_offset);
2821   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2822 
2823   __ Subs(num_chr, srcEnd, srcBegin);
2824   // Early out for valid zero-length retrievals.
2825   __ B(eq, final_label, /* is_far_target= */ false);
2826 
2827   // src range to copy.
2828   __ Add(src_ptr, srcObj, value_offset);
2829 
2830   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2831   vixl32::Register temp;
2832   vixl32::Label compressed_string_preloop;
2833   if (mirror::kUseStringCompression) {
2834     // Location of count in string.
2835     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2836     temp = temps.Acquire();
2837     // String's length.
2838     __ Ldr(temp, MemOperand(srcObj, count_offset));
2839     __ Tst(temp, 1);
2840     temps.Release(temp);
2841     __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2842   }
2843   __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2844 
2845   // Do the copy.
2846   vixl32::Label loop, remainder;
2847 
2848   temp = temps.Acquire();
2849   // Save repairing the value of num_chr on the < 4 character path.
2850   __ Subs(temp, num_chr, 4);
2851   __ B(lt, &remainder, /* is_far_target= */ false);
2852 
2853   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2854   __ Mov(num_chr, temp);
2855 
2856   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2857   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2858   // to rectify these everywhere this intrinsic applies.)
2859   __ Bind(&loop);
2860   __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2861   __ Subs(num_chr, num_chr, 4);
2862   __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2863   __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2864   __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2865   temps.Release(temp);
2866   __ B(ge, &loop, /* is_far_target= */ false);
2867 
2868   __ Adds(num_chr, num_chr, 4);
2869   __ B(eq, final_label, /* is_far_target= */ false);
2870 
2871   // Main loop for < 4 character case and remainder handling. Loads and stores one
2872   // 16-bit Java character at a time.
2873   __ Bind(&remainder);
2874   temp = temps.Acquire();
2875   __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2876   __ Subs(num_chr, num_chr, 1);
2877   __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2878   temps.Release(temp);
2879   __ B(gt, &remainder, /* is_far_target= */ false);
2880 
2881   if (mirror::kUseStringCompression) {
2882     __ B(final_label);
2883 
2884     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2885     DCHECK_EQ(c_char_size, 1u);
2886     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2887     __ Bind(&compressed_string_preloop);
2888     __ Add(src_ptr, src_ptr, srcBegin);
2889     __ Bind(&compressed_string_loop);
2890     temp = temps.Acquire();
2891     __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2892     __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2893     temps.Release(temp);
2894     __ Subs(num_chr, num_chr, 1);
2895     __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2896   }
2897 
2898   if (done.IsReferenced()) {
2899     __ Bind(&done);
2900   }
2901 }
2902 
VisitFloatIsInfinite(HInvoke * invoke)2903 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2904   CreateFPToIntLocations(allocator_, invoke);
2905 }
2906 
VisitFloatIsInfinite(HInvoke * invoke)2907 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2908   ArmVIXLAssembler* const assembler = GetAssembler();
2909   const vixl32::Register out = OutputRegister(invoke);
2910   // Shifting left by 1 bit makes the value encodable as an immediate operand;
2911   // we don't care about the sign bit anyway.
2912   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2913 
2914   __ Vmov(out, InputSRegisterAt(invoke, 0));
2915   // We don't care about the sign bit, so shift left.
2916   __ Lsl(out, out, 1);
2917   __ Eor(out, out, infinity);
2918   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2919 }
2920 
VisitDoubleIsInfinite(HInvoke * invoke)2921 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2922   CreateFPToIntLocations(allocator_, invoke);
2923 }
2924 
VisitDoubleIsInfinite(HInvoke * invoke)2925 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2926   ArmVIXLAssembler* const assembler = GetAssembler();
2927   const vixl32::Register out = OutputRegister(invoke);
2928   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2929   const vixl32::Register temp = temps.Acquire();
2930   // The highest 32 bits of double precision positive infinity separated into
2931   // two constants encodable as immediate operands.
2932   constexpr uint32_t infinity_high  = 0x7f000000U;
2933   constexpr uint32_t infinity_high2 = 0x00f00000U;
2934 
2935   static_assert((infinity_high | infinity_high2) ==
2936                     static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2937                 "The constants do not add up to the high 32 bits of double "
2938                 "precision positive infinity.");
2939   __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2940   __ Eor(out, out, infinity_high);
2941   __ Eor(out, out, infinity_high2);
2942   // We don't care about the sign bit, so shift left.
2943   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2944   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2945 }
2946 
VisitMathCeil(HInvoke * invoke)2947 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2948   if (features_.HasARMv8AInstructions()) {
2949     CreateFPToFPLocations(allocator_, invoke);
2950   }
2951 }
2952 
VisitMathCeil(HInvoke * invoke)2953 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2954   ArmVIXLAssembler* assembler = GetAssembler();
2955   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2956   __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2957 }
2958 
VisitMathFloor(HInvoke * invoke)2959 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2960   if (features_.HasARMv8AInstructions()) {
2961     CreateFPToFPLocations(allocator_, invoke);
2962   }
2963 }
2964 
VisitMathFloor(HInvoke * invoke)2965 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2966   ArmVIXLAssembler* assembler = GetAssembler();
2967   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2968   __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2969 }
2970 
VisitIntegerValueOf(HInvoke * invoke)2971 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2972   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2973   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2974       invoke,
2975       codegen_,
2976       LocationFrom(r0),
2977       LocationFrom(calling_convention.GetRegisterAt(0)));
2978 }
2979 
VisitIntegerValueOf(HInvoke * invoke)2980 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2981   IntrinsicVisitor::IntegerValueOfInfo info =
2982       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2983   LocationSummary* locations = invoke->GetLocations();
2984   ArmVIXLAssembler* const assembler = GetAssembler();
2985 
2986   vixl32::Register out = RegisterFrom(locations->Out());
2987   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2988   vixl32::Register temp = temps.Acquire();
2989   if (invoke->InputAt(0)->IsConstant()) {
2990     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2991     if (static_cast<uint32_t>(value - info.low) < info.length) {
2992       // Just embed the j.l.Integer in the code.
2993       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2994       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2995     } else {
2996       DCHECK(locations->CanCall());
2997       // Allocate and initialize a new j.l.Integer.
2998       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2999       // JIT object table.
3000       codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
3001                                              info.integer_boot_image_offset);
3002       __ Mov(temp, value);
3003       assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
3004       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3005       // one.
3006       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3007     }
3008   } else {
3009     DCHECK(locations->CanCall());
3010     vixl32::Register in = RegisterFrom(locations->InAt(0));
3011     // Check bounds of our cache.
3012     __ Add(out, in, -info.low);
3013     __ Cmp(out, info.length);
3014     vixl32::Label allocate, done;
3015     __ B(hs, &allocate, /* is_far_target= */ false);
3016     // If the value is within the bounds, load the j.l.Integer directly from the array.
3017     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
3018     codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
3019     assembler->MaybeUnpoisonHeapReference(out);
3020     __ B(&done);
3021     __ Bind(&allocate);
3022     // Otherwise allocate and initialize a new j.l.Integer.
3023     codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
3024                                            info.integer_boot_image_offset);
3025     assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
3026     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3027     // one.
3028     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3029     __ Bind(&done);
3030   }
3031 }
3032 
VisitThreadInterrupted(HInvoke * invoke)3033 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3034   LocationSummary* locations =
3035       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3036   locations->SetOut(Location::RequiresRegister());
3037 }
3038 
VisitThreadInterrupted(HInvoke * invoke)3039 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3040   ArmVIXLAssembler* assembler = GetAssembler();
3041   vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
3042   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
3043   __ Ldr(out, MemOperand(tr, offset));
3044   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3045   vixl32::Register temp = temps.Acquire();
3046   vixl32::Label done;
3047   vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
3048   __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
3049   __ Dmb(vixl32::ISH);
3050   __ Mov(temp, 0);
3051   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
3052   __ Dmb(vixl32::ISH);
3053   if (done.IsReferenced()) {
3054     __ Bind(&done);
3055   }
3056 }
3057 
VisitReachabilityFence(HInvoke * invoke)3058 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
3059   LocationSummary* locations =
3060       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3061   locations->SetInAt(0, Location::Any());
3062 }
3063 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3064 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3065 
3066 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
3067 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
3068 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
3069 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
3070 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
3071 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
3072 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
3073 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat)
3074 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf)
3075 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor)
3076 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil)
3077 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint)
3078 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater)
3079 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals)
3080 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less)
3081 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals)
3082 
3083 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
3084 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
3085 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
3086 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
3087 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
3088 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject);
3089 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString);
3090 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence);
3091 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray);
3092 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean);
3093 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar);
3094 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt);
3095 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong);
3096 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat);
3097 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble);
3098 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
3099 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
3100 
3101 // 1.8.
3102 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
3103 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
3104 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
3105 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
3106 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
3107 
3108 UNREACHABLE_INTRINSICS(ARMVIXL)
3109 
3110 #undef __
3111 
3112 }  // namespace arm
3113 }  // namespace art
3114