1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm.h"
18 
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "art_method.h"
21 #include "code_generator_arm.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "intrinsics.h"
24 #include "intrinsics_utils.h"
25 #include "lock_word.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/object_array-inl.h"
28 #include "mirror/reference.h"
29 #include "mirror/string.h"
30 #include "scoped_thread_state_change-inl.h"
31 #include "thread-inl.h"
32 #include "utils/arm/assembler_arm.h"
33 
34 namespace art {
35 
36 namespace arm {
37 
GetAssembler()38 ArmAssembler* IntrinsicCodeGeneratorARM::GetAssembler() {
39   return codegen_->GetAssembler();
40 }
41 
GetAllocator()42 ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
43   return codegen_->GetGraph()->GetArena();
44 }
45 
46 using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
47 
48 #define __ assembler->
49 
50 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmAssembler * assembler,Primitive::Type type,const Register & array,const Location & pos,const Register & base)51 static void GenSystemArrayCopyBaseAddress(ArmAssembler* assembler,
52                                           Primitive::Type type,
53                                           const Register& array,
54                                           const Location& pos,
55                                           const Register& base) {
56   // This routine is only used by the SystemArrayCopy intrinsic at the
57   // moment. We can allow Primitive::kPrimNot as `type` to implement
58   // the SystemArrayCopyChar intrinsic.
59   DCHECK_EQ(type, Primitive::kPrimNot);
60   const int32_t element_size = Primitive::ComponentSize(type);
61   const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
62   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
63 
64   if (pos.IsConstant()) {
65     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
66     __ AddConstant(base, array, element_size * constant + data_offset);
67   } else {
68     __ add(base, array, ShifterOperand(pos.AsRegister<Register>(), LSL, element_size_shift));
69     __ AddConstant(base, data_offset);
70   }
71 }
72 
73 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmAssembler * assembler,Primitive::Type type,const Location & copy_length,const Register & base,const Register & end)74 static void GenSystemArrayCopyEndAddress(ArmAssembler* assembler,
75                                          Primitive::Type type,
76                                          const Location& copy_length,
77                                          const Register& base,
78                                          const Register& end) {
79   // This routine is only used by the SystemArrayCopy intrinsic at the
80   // moment. We can allow Primitive::kPrimNot as `type` to implement
81   // the SystemArrayCopyChar intrinsic.
82   DCHECK_EQ(type, Primitive::kPrimNot);
83   const int32_t element_size = Primitive::ComponentSize(type);
84   const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
85 
86   if (copy_length.IsConstant()) {
87     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
88     __ AddConstant(end, base, element_size * constant);
89   } else {
90     __ add(end, base, ShifterOperand(copy_length.AsRegister<Register>(), LSL, element_size_shift));
91   }
92 }
93 
94 #undef __
95 
96 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
97 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->  // NOLINT
98 
99 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
100 class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
101  public:
ReadBarrierSystemArrayCopySlowPathARM(HInstruction * instruction)102   explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction)
103       : SlowPathCode(instruction) {
104     DCHECK(kEmitCompilerReadBarrier);
105     DCHECK(kUseBakerReadBarrier);
106   }
107 
EmitNativeCode(CodeGenerator * codegen)108   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
109     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
110     ArmAssembler* assembler = arm_codegen->GetAssembler();
111     LocationSummary* locations = instruction_->GetLocations();
112     DCHECK(locations->CanCall());
113     DCHECK(instruction_->IsInvokeStaticOrDirect())
114         << "Unexpected instruction in read barrier arraycopy slow path: "
115         << instruction_->DebugName();
116     DCHECK(instruction_->GetLocations()->Intrinsified());
117     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
118 
119     Primitive::Type type = Primitive::kPrimNot;
120     const int32_t element_size = Primitive::ComponentSize(type);
121 
122     Register dest = locations->InAt(2).AsRegister<Register>();
123     Location dest_pos = locations->InAt(3);
124     Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
125     Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
126     Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
127     Register tmp = locations->GetTemp(3).AsRegister<Register>();
128 
129     __ Bind(GetEntryLabel());
130     // Compute the base destination address in `dst_curr_addr`.
131     GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
132 
133     Label loop;
134     __ Bind(&loop);
135     __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex));
136     __ MaybeUnpoisonHeapReference(tmp);
137     // TODO: Inline the mark bit check before calling the runtime?
138     // tmp = ReadBarrier::Mark(tmp);
139     // No need to save live registers; it's taken care of by the
140     // entrypoint. Also, there is no need to update the stack mask,
141     // as this runtime call will not trigger a garbage collection.
142     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
143     // explanations.)
144     DCHECK_NE(tmp, SP);
145     DCHECK_NE(tmp, LR);
146     DCHECK_NE(tmp, PC);
147     // IP is used internally by the ReadBarrierMarkRegX entry point
148     // as a temporary (and not preserved).  It thus cannot be used by
149     // any live register in this slow path.
150     DCHECK_NE(src_curr_addr, IP);
151     DCHECK_NE(dst_curr_addr, IP);
152     DCHECK_NE(src_stop_addr, IP);
153     DCHECK_NE(tmp, IP);
154     DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
155     // TODO: Load the entrypoint once before the loop, instead of
156     // loading it at every iteration.
157     int32_t entry_point_offset =
158         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
159     // This runtime call does not require a stack map.
160     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
161     __ MaybePoisonHeapReference(tmp);
162     __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex));
163     __ cmp(src_curr_addr, ShifterOperand(src_stop_addr));
164     __ b(&loop, NE);
165     __ b(GetExitLabel());
166   }
167 
GetDescription() const168   const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; }
169 
170  private:
171   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM);
172 };
173 
174 #undef __
175 
IntrinsicLocationsBuilderARM(CodeGeneratorARM * codegen)176 IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen)
177     : arena_(codegen->GetGraph()->GetArena()),
178       codegen_(codegen),
179       assembler_(codegen->GetAssembler()),
180       features_(codegen->GetInstructionSetFeatures()) {}
181 
TryDispatch(HInvoke * invoke)182 bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
183   Dispatch(invoke);
184   LocationSummary* res = invoke->GetLocations();
185   if (res == nullptr) {
186     return false;
187   }
188   return res->Intrinsified();
189 }
190 
191 #define __ assembler->
192 
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)193 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
194   LocationSummary* locations = new (arena) LocationSummary(invoke,
195                                                            LocationSummary::kNoCall,
196                                                            kIntrinsified);
197   locations->SetInAt(0, Location::RequiresFpuRegister());
198   locations->SetOut(Location::RequiresRegister());
199 }
200 
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)201 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
202   LocationSummary* locations = new (arena) LocationSummary(invoke,
203                                                            LocationSummary::kNoCall,
204                                                            kIntrinsified);
205   locations->SetInAt(0, Location::RequiresRegister());
206   locations->SetOut(Location::RequiresFpuRegister());
207 }
208 
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmAssembler * assembler)209 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
210   Location input = locations->InAt(0);
211   Location output = locations->Out();
212   if (is64bit) {
213     __ vmovrrd(output.AsRegisterPairLow<Register>(),
214                output.AsRegisterPairHigh<Register>(),
215                FromLowSToD(input.AsFpuRegisterPairLow<SRegister>()));
216   } else {
217     __ vmovrs(output.AsRegister<Register>(), input.AsFpuRegister<SRegister>());
218   }
219 }
220 
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmAssembler * assembler)221 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
222   Location input = locations->InAt(0);
223   Location output = locations->Out();
224   if (is64bit) {
225     __ vmovdrr(FromLowSToD(output.AsFpuRegisterPairLow<SRegister>()),
226                input.AsRegisterPairLow<Register>(),
227                input.AsRegisterPairHigh<Register>());
228   } else {
229     __ vmovsr(output.AsFpuRegister<SRegister>(), input.AsRegister<Register>());
230   }
231 }
232 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)233 void IntrinsicLocationsBuilderARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
234   CreateFPToIntLocations(arena_, invoke);
235 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)236 void IntrinsicLocationsBuilderARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
237   CreateIntToFPLocations(arena_, invoke);
238 }
239 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)240 void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
241   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
242 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)243 void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
244   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
245 }
246 
VisitFloatFloatToRawIntBits(HInvoke * invoke)247 void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
248   CreateFPToIntLocations(arena_, invoke);
249 }
VisitFloatIntBitsToFloat(HInvoke * invoke)250 void IntrinsicLocationsBuilderARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
251   CreateIntToFPLocations(arena_, invoke);
252 }
253 
VisitFloatFloatToRawIntBits(HInvoke * invoke)254 void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
255   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
256 }
VisitFloatIntBitsToFloat(HInvoke * invoke)257 void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
258   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
259 }
260 
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)261 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
262   LocationSummary* locations = new (arena) LocationSummary(invoke,
263                                                            LocationSummary::kNoCall,
264                                                            kIntrinsified);
265   locations->SetInAt(0, Location::RequiresRegister());
266   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
267 }
268 
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)269 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
270   LocationSummary* locations = new (arena) LocationSummary(invoke,
271                                                            LocationSummary::kNoCall,
272                                                            kIntrinsified);
273   locations->SetInAt(0, Location::RequiresFpuRegister());
274   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
275 }
276 
GenNumberOfLeadingZeros(HInvoke * invoke,Primitive::Type type,CodeGeneratorARM * codegen)277 static void GenNumberOfLeadingZeros(HInvoke* invoke,
278                                     Primitive::Type type,
279                                     CodeGeneratorARM* codegen) {
280   ArmAssembler* assembler = codegen->GetAssembler();
281   LocationSummary* locations = invoke->GetLocations();
282   Location in = locations->InAt(0);
283   Register out = locations->Out().AsRegister<Register>();
284 
285   DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
286 
287   if (type == Primitive::kPrimLong) {
288     Register in_reg_lo = in.AsRegisterPairLow<Register>();
289     Register in_reg_hi = in.AsRegisterPairHigh<Register>();
290     Label end;
291     Label* final_label = codegen->GetFinalLabel(invoke, &end);
292     __ clz(out, in_reg_hi);
293     __ CompareAndBranchIfNonZero(in_reg_hi, final_label);
294     __ clz(out, in_reg_lo);
295     __ AddConstant(out, 32);
296     if (end.IsLinked()) {
297       __ Bind(&end);
298     }
299   } else {
300     __ clz(out, in.AsRegister<Register>());
301   }
302 }
303 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)304 void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
305   CreateIntToIntLocations(arena_, invoke);
306 }
307 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)308 void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
309   GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
310 }
311 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)312 void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
313   LocationSummary* locations = new (arena_) LocationSummary(invoke,
314                                                            LocationSummary::kNoCall,
315                                                            kIntrinsified);
316   locations->SetInAt(0, Location::RequiresRegister());
317   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
318 }
319 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)320 void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
321   GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
322 }
323 
GenNumberOfTrailingZeros(HInvoke * invoke,Primitive::Type type,CodeGeneratorARM * codegen)324 static void GenNumberOfTrailingZeros(HInvoke* invoke,
325                                      Primitive::Type type,
326                                      CodeGeneratorARM* codegen) {
327   DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
328 
329   ArmAssembler* assembler = codegen->GetAssembler();
330   LocationSummary* locations = invoke->GetLocations();
331   Register out = locations->Out().AsRegister<Register>();
332 
333   if (type == Primitive::kPrimLong) {
334     Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
335     Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
336     Label end;
337     Label* final_label = codegen->GetFinalLabel(invoke, &end);
338     __ rbit(out, in_reg_lo);
339     __ clz(out, out);
340     __ CompareAndBranchIfNonZero(in_reg_lo, final_label);
341     __ rbit(out, in_reg_hi);
342     __ clz(out, out);
343     __ AddConstant(out, 32);
344     if (end.IsLinked()) {
345       __ Bind(&end);
346     }
347   } else {
348     Register in = locations->InAt(0).AsRegister<Register>();
349     __ rbit(out, in);
350     __ clz(out, out);
351   }
352 }
353 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)354 void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
355   LocationSummary* locations = new (arena_) LocationSummary(invoke,
356                                                             LocationSummary::kNoCall,
357                                                             kIntrinsified);
358   locations->SetInAt(0, Location::RequiresRegister());
359   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
360 }
361 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)362 void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
363   GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
364 }
365 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)366 void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
367   LocationSummary* locations = new (arena_) LocationSummary(invoke,
368                                                             LocationSummary::kNoCall,
369                                                             kIntrinsified);
370   locations->SetInAt(0, Location::RequiresRegister());
371   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
372 }
373 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)374 void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
375   GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
376 }
377 
MathAbsFP(LocationSummary * locations,bool is64bit,ArmAssembler * assembler)378 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
379   Location in = locations->InAt(0);
380   Location out = locations->Out();
381 
382   if (is64bit) {
383     __ vabsd(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
384              FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
385   } else {
386     __ vabss(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>());
387   }
388 }
389 
VisitMathAbsDouble(HInvoke * invoke)390 void IntrinsicLocationsBuilderARM::VisitMathAbsDouble(HInvoke* invoke) {
391   CreateFPToFPLocations(arena_, invoke);
392 }
393 
VisitMathAbsDouble(HInvoke * invoke)394 void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) {
395   MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
396 }
397 
VisitMathAbsFloat(HInvoke * invoke)398 void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) {
399   CreateFPToFPLocations(arena_, invoke);
400 }
401 
VisitMathAbsFloat(HInvoke * invoke)402 void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) {
403   MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
404 }
405 
CreateIntToIntPlusTemp(ArenaAllocator * arena,HInvoke * invoke)406 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
407   LocationSummary* locations = new (arena) LocationSummary(invoke,
408                                                            LocationSummary::kNoCall,
409                                                            kIntrinsified);
410   locations->SetInAt(0, Location::RequiresRegister());
411   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
412 
413   locations->AddTemp(Location::RequiresRegister());
414 }
415 
GenAbsInteger(LocationSummary * locations,bool is64bit,ArmAssembler * assembler)416 static void GenAbsInteger(LocationSummary* locations,
417                           bool is64bit,
418                           ArmAssembler* assembler) {
419   Location in = locations->InAt(0);
420   Location output = locations->Out();
421 
422   Register mask = locations->GetTemp(0).AsRegister<Register>();
423 
424   if (is64bit) {
425     Register in_reg_lo = in.AsRegisterPairLow<Register>();
426     Register in_reg_hi = in.AsRegisterPairHigh<Register>();
427     Register out_reg_lo = output.AsRegisterPairLow<Register>();
428     Register out_reg_hi = output.AsRegisterPairHigh<Register>();
429 
430     DCHECK_NE(out_reg_lo, in_reg_hi) << "Diagonal overlap unexpected.";
431 
432     __ Asr(mask, in_reg_hi, 31);
433     __ adds(out_reg_lo, in_reg_lo, ShifterOperand(mask));
434     __ adc(out_reg_hi, in_reg_hi, ShifterOperand(mask));
435     __ eor(out_reg_lo, mask, ShifterOperand(out_reg_lo));
436     __ eor(out_reg_hi, mask, ShifterOperand(out_reg_hi));
437   } else {
438     Register in_reg = in.AsRegister<Register>();
439     Register out_reg = output.AsRegister<Register>();
440 
441     __ Asr(mask, in_reg, 31);
442     __ add(out_reg, in_reg, ShifterOperand(mask));
443     __ eor(out_reg, mask, ShifterOperand(out_reg));
444   }
445 }
446 
VisitMathAbsInt(HInvoke * invoke)447 void IntrinsicLocationsBuilderARM::VisitMathAbsInt(HInvoke* invoke) {
448   CreateIntToIntPlusTemp(arena_, invoke);
449 }
450 
VisitMathAbsInt(HInvoke * invoke)451 void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) {
452   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
453 }
454 
455 
VisitMathAbsLong(HInvoke * invoke)456 void IntrinsicLocationsBuilderARM::VisitMathAbsLong(HInvoke* invoke) {
457   CreateIntToIntPlusTemp(arena_, invoke);
458 }
459 
VisitMathAbsLong(HInvoke * invoke)460 void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) {
461   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
462 }
463 
GenMinMax(LocationSummary * locations,bool is_min,ArmAssembler * assembler)464 static void GenMinMax(LocationSummary* locations,
465                       bool is_min,
466                       ArmAssembler* assembler) {
467   Register op1 = locations->InAt(0).AsRegister<Register>();
468   Register op2 = locations->InAt(1).AsRegister<Register>();
469   Register out = locations->Out().AsRegister<Register>();
470 
471   __ cmp(op1, ShifterOperand(op2));
472 
473   __ it((is_min) ? Condition::LT : Condition::GT, kItElse);
474   __ mov(out, ShifterOperand(op1), is_min ? Condition::LT : Condition::GT);
475   __ mov(out, ShifterOperand(op2), is_min ? Condition::GE : Condition::LE);
476 }
477 
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)478 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
479   LocationSummary* locations = new (arena) LocationSummary(invoke,
480                                                            LocationSummary::kNoCall,
481                                                            kIntrinsified);
482   locations->SetInAt(0, Location::RequiresRegister());
483   locations->SetInAt(1, Location::RequiresRegister());
484   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
485 }
486 
VisitMathMinIntInt(HInvoke * invoke)487 void IntrinsicLocationsBuilderARM::VisitMathMinIntInt(HInvoke* invoke) {
488   CreateIntIntToIntLocations(arena_, invoke);
489 }
490 
VisitMathMinIntInt(HInvoke * invoke)491 void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) {
492   GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
493 }
494 
VisitMathMaxIntInt(HInvoke * invoke)495 void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) {
496   CreateIntIntToIntLocations(arena_, invoke);
497 }
498 
VisitMathMaxIntInt(HInvoke * invoke)499 void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) {
500   GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
501 }
502 
VisitMathSqrt(HInvoke * invoke)503 void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) {
504   CreateFPToFPLocations(arena_, invoke);
505 }
506 
VisitMathSqrt(HInvoke * invoke)507 void IntrinsicCodeGeneratorARM::VisitMathSqrt(HInvoke* invoke) {
508   LocationSummary* locations = invoke->GetLocations();
509   ArmAssembler* assembler = GetAssembler();
510   __ vsqrtd(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
511             FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
512 }
513 
VisitMemoryPeekByte(HInvoke * invoke)514 void IntrinsicLocationsBuilderARM::VisitMemoryPeekByte(HInvoke* invoke) {
515   CreateIntToIntLocations(arena_, invoke);
516 }
517 
VisitMemoryPeekByte(HInvoke * invoke)518 void IntrinsicCodeGeneratorARM::VisitMemoryPeekByte(HInvoke* invoke) {
519   ArmAssembler* assembler = GetAssembler();
520   // Ignore upper 4B of long address.
521   __ ldrsb(invoke->GetLocations()->Out().AsRegister<Register>(),
522            Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
523 }
524 
VisitMemoryPeekIntNative(HInvoke * invoke)525 void IntrinsicLocationsBuilderARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
526   CreateIntToIntLocations(arena_, invoke);
527 }
528 
VisitMemoryPeekIntNative(HInvoke * invoke)529 void IntrinsicCodeGeneratorARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
530   ArmAssembler* assembler = GetAssembler();
531   // Ignore upper 4B of long address.
532   __ ldr(invoke->GetLocations()->Out().AsRegister<Register>(),
533          Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
534 }
535 
VisitMemoryPeekLongNative(HInvoke * invoke)536 void IntrinsicLocationsBuilderARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
537   CreateIntToIntLocations(arena_, invoke);
538 }
539 
VisitMemoryPeekLongNative(HInvoke * invoke)540 void IntrinsicCodeGeneratorARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
541   ArmAssembler* assembler = GetAssembler();
542   // Ignore upper 4B of long address.
543   Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
544   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
545   // exception. So we can't use ldrd as addr may be unaligned.
546   Register lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>();
547   Register hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>();
548   if (addr == lo) {
549     __ ldr(hi, Address(addr, 4));
550     __ ldr(lo, Address(addr, 0));
551   } else {
552     __ ldr(lo, Address(addr, 0));
553     __ ldr(hi, Address(addr, 4));
554   }
555 }
556 
VisitMemoryPeekShortNative(HInvoke * invoke)557 void IntrinsicLocationsBuilderARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
558   CreateIntToIntLocations(arena_, invoke);
559 }
560 
VisitMemoryPeekShortNative(HInvoke * invoke)561 void IntrinsicCodeGeneratorARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
562   ArmAssembler* assembler = GetAssembler();
563   // Ignore upper 4B of long address.
564   __ ldrsh(invoke->GetLocations()->Out().AsRegister<Register>(),
565            Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
566 }
567 
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)568 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
569   LocationSummary* locations = new (arena) LocationSummary(invoke,
570                                                            LocationSummary::kNoCall,
571                                                            kIntrinsified);
572   locations->SetInAt(0, Location::RequiresRegister());
573   locations->SetInAt(1, Location::RequiresRegister());
574 }
575 
VisitMemoryPokeByte(HInvoke * invoke)576 void IntrinsicLocationsBuilderARM::VisitMemoryPokeByte(HInvoke* invoke) {
577   CreateIntIntToVoidLocations(arena_, invoke);
578 }
579 
VisitMemoryPokeByte(HInvoke * invoke)580 void IntrinsicCodeGeneratorARM::VisitMemoryPokeByte(HInvoke* invoke) {
581   ArmAssembler* assembler = GetAssembler();
582   __ strb(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
583           Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
584 }
585 
VisitMemoryPokeIntNative(HInvoke * invoke)586 void IntrinsicLocationsBuilderARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
587   CreateIntIntToVoidLocations(arena_, invoke);
588 }
589 
VisitMemoryPokeIntNative(HInvoke * invoke)590 void IntrinsicCodeGeneratorARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
591   ArmAssembler* assembler = GetAssembler();
592   __ str(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
593          Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
594 }
595 
VisitMemoryPokeLongNative(HInvoke * invoke)596 void IntrinsicLocationsBuilderARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
597   CreateIntIntToVoidLocations(arena_, invoke);
598 }
599 
VisitMemoryPokeLongNative(HInvoke * invoke)600 void IntrinsicCodeGeneratorARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
601   ArmAssembler* assembler = GetAssembler();
602   // Ignore upper 4B of long address.
603   Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
604   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
605   // exception. So we can't use ldrd as addr may be unaligned.
606   __ str(invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>(), Address(addr, 0));
607   __ str(invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>(), Address(addr, 4));
608 }
609 
VisitMemoryPokeShortNative(HInvoke * invoke)610 void IntrinsicLocationsBuilderARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
611   CreateIntIntToVoidLocations(arena_, invoke);
612 }
613 
VisitMemoryPokeShortNative(HInvoke * invoke)614 void IntrinsicCodeGeneratorARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
615   ArmAssembler* assembler = GetAssembler();
616   __ strh(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
617           Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
618 }
619 
VisitThreadCurrentThread(HInvoke * invoke)620 void IntrinsicLocationsBuilderARM::VisitThreadCurrentThread(HInvoke* invoke) {
621   LocationSummary* locations = new (arena_) LocationSummary(invoke,
622                                                             LocationSummary::kNoCall,
623                                                             kIntrinsified);
624   locations->SetOut(Location::RequiresRegister());
625 }
626 
VisitThreadCurrentThread(HInvoke * invoke)627 void IntrinsicCodeGeneratorARM::VisitThreadCurrentThread(HInvoke* invoke) {
628   ArmAssembler* assembler = GetAssembler();
629   __ LoadFromOffset(kLoadWord,
630                     invoke->GetLocations()->Out().AsRegister<Register>(),
631                     TR,
632                     Thread::PeerOffset<kArmPointerSize>().Int32Value());
633 }
634 
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile,CodeGeneratorARM * codegen)635 static void GenUnsafeGet(HInvoke* invoke,
636                          Primitive::Type type,
637                          bool is_volatile,
638                          CodeGeneratorARM* codegen) {
639   LocationSummary* locations = invoke->GetLocations();
640   ArmAssembler* assembler = codegen->GetAssembler();
641   Location base_loc = locations->InAt(1);
642   Register base = base_loc.AsRegister<Register>();             // Object pointer.
643   Location offset_loc = locations->InAt(2);
644   Register offset = offset_loc.AsRegisterPairLow<Register>();  // Long offset, lo part only.
645   Location trg_loc = locations->Out();
646 
647   switch (type) {
648     case Primitive::kPrimInt: {
649       Register trg = trg_loc.AsRegister<Register>();
650       __ ldr(trg, Address(base, offset));
651       if (is_volatile) {
652         __ dmb(ISH);
653       }
654       break;
655     }
656 
657     case Primitive::kPrimNot: {
658       Register trg = trg_loc.AsRegister<Register>();
659       if (kEmitCompilerReadBarrier) {
660         if (kUseBakerReadBarrier) {
661           Location temp = locations->GetTemp(0);
662           codegen->GenerateReferenceLoadWithBakerReadBarrier(
663               invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
664           if (is_volatile) {
665             __ dmb(ISH);
666           }
667         } else {
668           __ ldr(trg, Address(base, offset));
669           if (is_volatile) {
670             __ dmb(ISH);
671           }
672           codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
673         }
674       } else {
675         __ ldr(trg, Address(base, offset));
676         if (is_volatile) {
677           __ dmb(ISH);
678         }
679         __ MaybeUnpoisonHeapReference(trg);
680       }
681       break;
682     }
683 
684     case Primitive::kPrimLong: {
685       Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
686       __ add(IP, base, ShifterOperand(offset));
687       if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
688         Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
689         __ ldrexd(trg_lo, trg_hi, IP);
690       } else {
691         __ ldrd(trg_lo, Address(IP));
692       }
693       if (is_volatile) {
694         __ dmb(ISH);
695       }
696       break;
697     }
698 
699     default:
700       LOG(FATAL) << "Unexpected type " << type;
701       UNREACHABLE();
702   }
703 }
704 
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)705 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
706                                           HInvoke* invoke,
707                                           Primitive::Type type) {
708   bool can_call = kEmitCompilerReadBarrier &&
709       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
710        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
711   LocationSummary* locations = new (arena) LocationSummary(invoke,
712                                                            (can_call
713                                                                 ? LocationSummary::kCallOnSlowPath
714                                                                 : LocationSummary::kNoCall),
715                                                            kIntrinsified);
716   if (can_call && kUseBakerReadBarrier) {
717     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
718   }
719   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
720   locations->SetInAt(1, Location::RequiresRegister());
721   locations->SetInAt(2, Location::RequiresRegister());
722   locations->SetOut(Location::RequiresRegister(),
723                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
724   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
725     // We need a temporary register for the read barrier marking slow
726     // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
727     locations->AddTemp(Location::RequiresRegister());
728   }
729 }
730 
VisitUnsafeGet(HInvoke * invoke)731 void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
732   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
733 }
VisitUnsafeGetVolatile(HInvoke * invoke)734 void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
735   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
736 }
VisitUnsafeGetLong(HInvoke * invoke)737 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
738   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
739 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)740 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
741   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
742 }
VisitUnsafeGetObject(HInvoke * invoke)743 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
744   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
745 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)746 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
747   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
748 }
749 
VisitUnsafeGet(HInvoke * invoke)750 void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
751   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
752 }
VisitUnsafeGetVolatile(HInvoke * invoke)753 void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
754   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
755 }
VisitUnsafeGetLong(HInvoke * invoke)756 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) {
757   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
758 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)759 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
760   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
761 }
VisitUnsafeGetObject(HInvoke * invoke)762 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) {
763   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
764 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)765 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
766   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
767 }
768 
CreateIntIntIntIntToVoid(ArenaAllocator * arena,const ArmInstructionSetFeatures & features,Primitive::Type type,bool is_volatile,HInvoke * invoke)769 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
770                                      const ArmInstructionSetFeatures& features,
771                                      Primitive::Type type,
772                                      bool is_volatile,
773                                      HInvoke* invoke) {
774   LocationSummary* locations = new (arena) LocationSummary(invoke,
775                                                            LocationSummary::kNoCall,
776                                                            kIntrinsified);
777   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
778   locations->SetInAt(1, Location::RequiresRegister());
779   locations->SetInAt(2, Location::RequiresRegister());
780   locations->SetInAt(3, Location::RequiresRegister());
781 
782   if (type == Primitive::kPrimLong) {
783     // Potentially need temps for ldrexd-strexd loop.
784     if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
785       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
786       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
787     }
788   } else if (type == Primitive::kPrimNot) {
789     // Temps for card-marking.
790     locations->AddTemp(Location::RequiresRegister());  // Temp.
791     locations->AddTemp(Location::RequiresRegister());  // Card.
792   }
793 }
794 
VisitUnsafePut(HInvoke * invoke)795 void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) {
796   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
797 }
VisitUnsafePutOrdered(HInvoke * invoke)798 void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) {
799   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
800 }
VisitUnsafePutVolatile(HInvoke * invoke)801 void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) {
802   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
803 }
VisitUnsafePutObject(HInvoke * invoke)804 void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) {
805   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
806 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)807 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
808   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
809 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)810 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
811   CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
812 }
VisitUnsafePutLong(HInvoke * invoke)813 void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) {
814   CreateIntIntIntIntToVoid(
815       arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
816 }
VisitUnsafePutLongOrdered(HInvoke * invoke)817 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
818   CreateIntIntIntIntToVoid(
819       arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
820 }
VisitUnsafePutLongVolatile(HInvoke * invoke)821 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
822   CreateIntIntIntIntToVoid(
823       arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
824 }
825 
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM * codegen)826 static void GenUnsafePut(LocationSummary* locations,
827                          Primitive::Type type,
828                          bool is_volatile,
829                          bool is_ordered,
830                          CodeGeneratorARM* codegen) {
831   ArmAssembler* assembler = codegen->GetAssembler();
832 
833   Register base = locations->InAt(1).AsRegister<Register>();           // Object pointer.
834   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();  // Long offset, lo part only.
835   Register value;
836 
837   if (is_volatile || is_ordered) {
838     __ dmb(ISH);
839   }
840 
841   if (type == Primitive::kPrimLong) {
842     Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
843     value = value_lo;
844     if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
845       Register temp_lo = locations->GetTemp(0).AsRegister<Register>();
846       Register temp_hi = locations->GetTemp(1).AsRegister<Register>();
847       Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
848 
849       __ add(IP, base, ShifterOperand(offset));
850       Label loop_head;
851       __ Bind(&loop_head);
852       __ ldrexd(temp_lo, temp_hi, IP);
853       __ strexd(temp_lo, value_lo, value_hi, IP);
854       __ cmp(temp_lo, ShifterOperand(0));
855       __ b(&loop_head, NE);
856     } else {
857       __ add(IP, base, ShifterOperand(offset));
858       __ strd(value_lo, Address(IP));
859     }
860   } else {
861     value = locations->InAt(3).AsRegister<Register>();
862     Register source = value;
863     if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
864       Register temp = locations->GetTemp(0).AsRegister<Register>();
865       __ Mov(temp, value);
866       __ PoisonHeapReference(temp);
867       source = temp;
868     }
869     __ str(source, Address(base, offset));
870   }
871 
872   if (is_volatile) {
873     __ dmb(ISH);
874   }
875 
876   if (type == Primitive::kPrimNot) {
877     Register temp = locations->GetTemp(0).AsRegister<Register>();
878     Register card = locations->GetTemp(1).AsRegister<Register>();
879     bool value_can_be_null = true;  // TODO: Worth finding out this information?
880     codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
881   }
882 }
883 
VisitUnsafePut(HInvoke * invoke)884 void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) {
885   GenUnsafePut(invoke->GetLocations(),
886                Primitive::kPrimInt,
887                /* is_volatile */ false,
888                /* is_ordered */ false,
889                codegen_);
890 }
VisitUnsafePutOrdered(HInvoke * invoke)891 void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) {
892   GenUnsafePut(invoke->GetLocations(),
893                Primitive::kPrimInt,
894                /* is_volatile */ false,
895                /* is_ordered */ true,
896                codegen_);
897 }
VisitUnsafePutVolatile(HInvoke * invoke)898 void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) {
899   GenUnsafePut(invoke->GetLocations(),
900                Primitive::kPrimInt,
901                /* is_volatile */ true,
902                /* is_ordered */ false,
903                codegen_);
904 }
VisitUnsafePutObject(HInvoke * invoke)905 void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) {
906   GenUnsafePut(invoke->GetLocations(),
907                Primitive::kPrimNot,
908                /* is_volatile */ false,
909                /* is_ordered */ false,
910                codegen_);
911 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)912 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
913   GenUnsafePut(invoke->GetLocations(),
914                Primitive::kPrimNot,
915                /* is_volatile */ false,
916                /* is_ordered */ true,
917                codegen_);
918 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)919 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
920   GenUnsafePut(invoke->GetLocations(),
921                Primitive::kPrimNot,
922                /* is_volatile */ true,
923                /* is_ordered */ false,
924                codegen_);
925 }
VisitUnsafePutLong(HInvoke * invoke)926 void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) {
927   GenUnsafePut(invoke->GetLocations(),
928                Primitive::kPrimLong,
929                /* is_volatile */ false,
930                /* is_ordered */ false,
931                codegen_);
932 }
VisitUnsafePutLongOrdered(HInvoke * invoke)933 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
934   GenUnsafePut(invoke->GetLocations(),
935                Primitive::kPrimLong,
936                /* is_volatile */ false,
937                /* is_ordered */ true,
938                codegen_);
939 }
VisitUnsafePutLongVolatile(HInvoke * invoke)940 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
941   GenUnsafePut(invoke->GetLocations(),
942                Primitive::kPrimLong,
943                /* is_volatile */ true,
944                /* is_ordered */ false,
945                codegen_);
946 }
947 
CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)948 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
949                                                 HInvoke* invoke,
950                                                 Primitive::Type type) {
951   bool can_call = kEmitCompilerReadBarrier &&
952       kUseBakerReadBarrier &&
953       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
954   LocationSummary* locations = new (arena) LocationSummary(invoke,
955                                                            (can_call
956                                                                 ? LocationSummary::kCallOnSlowPath
957                                                                 : LocationSummary::kNoCall),
958                                                            kIntrinsified);
959   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
960   locations->SetInAt(1, Location::RequiresRegister());
961   locations->SetInAt(2, Location::RequiresRegister());
962   locations->SetInAt(3, Location::RequiresRegister());
963   locations->SetInAt(4, Location::RequiresRegister());
964 
965   // If heap poisoning is enabled, we don't want the unpoisoning
966   // operations to potentially clobber the output. Likewise when
967   // emitting a (Baker) read barrier, which may call.
968   Location::OutputOverlap overlaps =
969       ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
970       ? Location::kOutputOverlap
971       : Location::kNoOutputOverlap;
972   locations->SetOut(Location::RequiresRegister(), overlaps);
973 
974   // Temporary registers used in CAS. In the object case
975   // (UnsafeCASObject intrinsic), these are also used for
976   // card-marking, and possibly for (Baker) read barrier.
977   locations->AddTemp(Location::RequiresRegister());  // Pointer.
978   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
979 }
980 
GenCas(HInvoke * invoke,Primitive::Type type,CodeGeneratorARM * codegen)981 static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* codegen) {
982   DCHECK_NE(type, Primitive::kPrimLong);
983 
984   ArmAssembler* assembler = codegen->GetAssembler();
985   LocationSummary* locations = invoke->GetLocations();
986 
987   Location out_loc = locations->Out();
988   Register out = out_loc.AsRegister<Register>();                  // Boolean result.
989 
990   Register base = locations->InAt(1).AsRegister<Register>();      // Object pointer.
991   Location offset_loc = locations->InAt(2);
992   Register offset = offset_loc.AsRegisterPairLow<Register>();     // Offset (discard high 4B).
993   Register expected = locations->InAt(3).AsRegister<Register>();  // Expected.
994   Register value = locations->InAt(4).AsRegister<Register>();     // Value.
995 
996   Location tmp_ptr_loc = locations->GetTemp(0);
997   Register tmp_ptr = tmp_ptr_loc.AsRegister<Register>();          // Pointer to actual memory.
998   Register tmp = locations->GetTemp(1).AsRegister<Register>();    // Value in memory.
999 
1000   if (type == Primitive::kPrimNot) {
1001     // The only read barrier implementation supporting the
1002     // UnsafeCASObject intrinsic is the Baker-style read barriers.
1003     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1004 
1005     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1006     // object and scan the receiver at the next GC for nothing.
1007     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1008     codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1009 
1010     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1011       // Need to make sure the reference stored in the field is a to-space
1012       // one before attempting the CAS or the CAS could fail incorrectly.
1013       codegen->GenerateReferenceLoadWithBakerReadBarrier(
1014           invoke,
1015           out_loc,  // Unused, used only as a "temporary" within the read barrier.
1016           base,
1017           /* offset */ 0u,
1018           /* index */ offset_loc,
1019           ScaleFactor::TIMES_1,
1020           tmp_ptr_loc,
1021           /* needs_null_check */ false,
1022           /* always_update_field */ true,
1023           &tmp);
1024     }
1025   }
1026 
1027   // Prevent reordering with prior memory operations.
1028   // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1029   // latter allows a preceding load to be delayed past the STXR
1030   // instruction below.
1031   __ dmb(ISH);
1032 
1033   __ add(tmp_ptr, base, ShifterOperand(offset));
1034 
1035   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1036     __ PoisonHeapReference(expected);
1037     if (value == expected) {
1038       // Do not poison `value`, as it is the same register as
1039       // `expected`, which has just been poisoned.
1040     } else {
1041       __ PoisonHeapReference(value);
1042     }
1043   }
1044 
1045   // do {
1046   //   tmp = [r_ptr] - expected;
1047   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1048   // result = tmp != 0;
1049 
1050   Label loop_head;
1051   __ Bind(&loop_head);
1052 
1053   __ ldrex(tmp, tmp_ptr);
1054 
1055   __ subs(tmp, tmp, ShifterOperand(expected));
1056 
1057   __ it(EQ, ItState::kItT);
1058   __ strex(tmp, value, tmp_ptr, EQ);
1059   __ cmp(tmp, ShifterOperand(1), EQ);
1060 
1061   __ b(&loop_head, EQ);
1062 
1063   __ dmb(ISH);
1064 
1065   __ rsbs(out, tmp, ShifterOperand(1));
1066   __ it(CC);
1067   __ mov(out, ShifterOperand(0), CC);
1068 
1069   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1070     __ UnpoisonHeapReference(expected);
1071     if (value == expected) {
1072       // Do not unpoison `value`, as it is the same register as
1073       // `expected`, which has just been unpoisoned.
1074     } else {
1075       __ UnpoisonHeapReference(value);
1076     }
1077   }
1078 }
1079 
VisitUnsafeCASInt(HInvoke * invoke)1080 void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
1081   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
1082 }
VisitUnsafeCASObject(HInvoke * invoke)1083 void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
1084   // The only read barrier implementation supporting the
1085   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1086   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1087     return;
1088   }
1089 
1090   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
1091 }
VisitUnsafeCASInt(HInvoke * invoke)1092 void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
1093   GenCas(invoke, Primitive::kPrimInt, codegen_);
1094 }
VisitUnsafeCASObject(HInvoke * invoke)1095 void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
1096   // The only read barrier implementation supporting the
1097   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1098   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1099 
1100   GenCas(invoke, Primitive::kPrimNot, codegen_);
1101 }
1102 
VisitStringCompareTo(HInvoke * invoke)1103 void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
1104   // The inputs plus one temp.
1105   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1106                                                             invoke->InputAt(1)->CanBeNull()
1107                                                                 ? LocationSummary::kCallOnSlowPath
1108                                                                 : LocationSummary::kNoCall,
1109                                                             kIntrinsified);
1110   locations->SetInAt(0, Location::RequiresRegister());
1111   locations->SetInAt(1, Location::RequiresRegister());
1112   locations->AddTemp(Location::RequiresRegister());
1113   locations->AddTemp(Location::RequiresRegister());
1114   locations->AddTemp(Location::RequiresRegister());
1115   // Need temporary registers for String compression's feature.
1116   if (mirror::kUseStringCompression) {
1117     locations->AddTemp(Location::RequiresRegister());
1118   }
1119   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1120 }
1121 
VisitStringCompareTo(HInvoke * invoke)1122 void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
1123   ArmAssembler* assembler = GetAssembler();
1124   LocationSummary* locations = invoke->GetLocations();
1125 
1126   Register str = locations->InAt(0).AsRegister<Register>();
1127   Register arg = locations->InAt(1).AsRegister<Register>();
1128   Register out = locations->Out().AsRegister<Register>();
1129 
1130   Register temp0 = locations->GetTemp(0).AsRegister<Register>();
1131   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
1132   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
1133   Register temp3;
1134   if (mirror::kUseStringCompression) {
1135     temp3 = locations->GetTemp(3).AsRegister<Register>();
1136   }
1137 
1138   Label loop;
1139   Label find_char_diff;
1140   Label end;
1141   Label different_compression;
1142 
1143   // Get offsets of count and value fields within a string object.
1144   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1145   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1146 
1147   // Note that the null check must have been done earlier.
1148   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1149 
1150   // Take slow path and throw if input can be and is null.
1151   SlowPathCode* slow_path = nullptr;
1152   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1153   if (can_slow_path) {
1154     slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
1155     codegen_->AddSlowPath(slow_path);
1156     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
1157   }
1158 
1159   // Reference equality check, return 0 if same reference.
1160   __ subs(out, str, ShifterOperand(arg));
1161   __ b(&end, EQ);
1162 
1163   if (mirror::kUseStringCompression) {
1164     // Load `count` fields of this and argument strings.
1165     __ ldr(temp3, Address(str, count_offset));
1166     __ ldr(temp2, Address(arg, count_offset));
1167     // Extract lengths from the `count` fields.
1168     __ Lsr(temp0, temp3, 1u);
1169     __ Lsr(temp1, temp2, 1u);
1170   } else {
1171     // Load lengths of this and argument strings.
1172     __ ldr(temp0, Address(str, count_offset));
1173     __ ldr(temp1, Address(arg, count_offset));
1174   }
1175   // out = length diff.
1176   __ subs(out, temp0, ShifterOperand(temp1));
1177   // temp0 = min(len(str), len(arg)).
1178   __ it(GT);
1179   __ mov(temp0, ShifterOperand(temp1), GT);
1180   // Shorter string is empty?
1181   __ CompareAndBranchIfZero(temp0, &end);
1182 
1183   if (mirror::kUseStringCompression) {
1184     // Check if both strings using same compression style to use this comparison loop.
1185     __ eor(temp2, temp2, ShifterOperand(temp3));
1186     __ Lsrs(temp2, temp2, 1u);
1187     __ b(&different_compression, CS);
1188     // For string compression, calculate the number of bytes to compare (not chars).
1189     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1190     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
1191     __ it(NE);
1192     __ add(temp0, temp0, ShifterOperand(temp0), NE);
1193   }
1194 
1195   // Store offset of string value in preparation for comparison loop.
1196   __ mov(temp1, ShifterOperand(value_offset));
1197 
1198   // Assertions that must hold in order to compare multiple characters at a time.
1199   CHECK_ALIGNED(value_offset, 8);
1200   static_assert(IsAligned<8>(kObjectAlignment),
1201                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1202 
1203   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1204   DCHECK_EQ(char_size, 2u);
1205 
1206   Label find_char_diff_2nd_cmp;
1207   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1208   __ Bind(&loop);
1209   __ ldr(IP, Address(str, temp1));
1210   __ ldr(temp2, Address(arg, temp1));
1211   __ cmp(IP, ShifterOperand(temp2));
1212   __ b(&find_char_diff, NE);
1213   __ add(temp1, temp1, ShifterOperand(char_size * 2));
1214 
1215   __ ldr(IP, Address(str, temp1));
1216   __ ldr(temp2, Address(arg, temp1));
1217   __ cmp(IP, ShifterOperand(temp2));
1218   __ b(&find_char_diff_2nd_cmp, NE);
1219   __ add(temp1, temp1, ShifterOperand(char_size * 2));
1220   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1221   __ subs(temp0, temp0, ShifterOperand(mirror::kUseStringCompression ? 8 : 4));
1222   __ b(&loop, HI);
1223   __ b(&end);
1224 
1225   __ Bind(&find_char_diff_2nd_cmp);
1226   if (mirror::kUseStringCompression) {
1227     __ subs(temp0, temp0, ShifterOperand(4));  // 4 bytes previously compared.
1228     __ b(&end, LS);  // Was the second comparison fully beyond the end?
1229   } else {
1230     // Without string compression, we can start treating temp0 as signed
1231     // and rely on the signed comparison below.
1232     __ sub(temp0, temp0, ShifterOperand(2));
1233   }
1234 
1235   // Find the single character difference.
1236   __ Bind(&find_char_diff);
1237   // Get the bit position of the first character that differs.
1238   __ eor(temp1, temp2, ShifterOperand(IP));
1239   __ rbit(temp1, temp1);
1240   __ clz(temp1, temp1);
1241 
1242   // temp0 = number of characters remaining to compare.
1243   // (Without string compression, it could be < 1 if a difference is found by the second CMP
1244   // in the comparison loop, and after the end of the shorter string data).
1245 
1246   // Without string compression (temp1 >> 4) = character where difference occurs between the last
1247   // two words compared, in the interval [0,1].
1248   // (0 for low half-word different, 1 for high half-word different).
1249   // With string compression, (temp1 << 3) = byte where the difference occurs,
1250   // in the interval [0,3].
1251 
1252   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1253   // the remaining string data, so just return length diff (out).
1254   // The comparison is unsigned for string compression, otherwise signed.
1255   __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4));
1256   __ b(&end, mirror::kUseStringCompression ? LS : LE);
1257 
1258   // Extract the characters and calculate the difference.
1259   if (mirror::kUseStringCompression) {
1260     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1261     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1262     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
1263     __ orr(temp3, temp3, ShifterOperand(0xffu << 23));  // uncompressed ? 0xff800000u : 0x7ff80000u
1264     __ bic(temp1, temp1, ShifterOperand(temp3, LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
1265     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
1266     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
1267     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
1268     __ Lsr(out, IP, temp1);                             // Extract first character.
1269     __ and_(temp2, temp2, ShifterOperand(temp3));
1270     __ and_(out, out, ShifterOperand(temp3));
1271   } else {
1272     __ bic(temp1, temp1, ShifterOperand(0xf));
1273     __ Lsr(temp2, temp2, temp1);
1274     __ Lsr(out, IP, temp1);
1275     __ movt(temp2, 0);
1276     __ movt(out, 0);
1277   }
1278 
1279   __ sub(out, out, ShifterOperand(temp2));
1280 
1281   if (mirror::kUseStringCompression) {
1282     __ b(&end);
1283     __ Bind(&different_compression);
1284 
1285     // Comparison for different compression style.
1286     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1287     DCHECK_EQ(c_char_size, 1u);
1288 
1289     // We want to free up the temp3, currently holding `str.count`, for comparison.
1290     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1291     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1292     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1293     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1294     __ add(temp0, temp0, ShifterOperand(temp0));  // Unlike LSL, this ADD is always 16-bit.
1295     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1296     __ mov(temp1, ShifterOperand(str));
1297     __ mov(temp2, ShifterOperand(arg));
1298     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
1299     __ it(CS, kItThen);                       // Interleave with selection of temp1 and temp2.
1300     __ mov(temp1, ShifterOperand(arg), CS);   // Preserves flags.
1301     __ mov(temp2, ShifterOperand(str), CS);   // Preserves flags.
1302     __ sbc(temp0, temp0, ShifterOperand(0));  // Complete the move of the compression flag.
1303 
1304     // Adjust temp1 and temp2 from string pointers to data pointers.
1305     __ add(temp1, temp1, ShifterOperand(value_offset));
1306     __ add(temp2, temp2, ShifterOperand(value_offset));
1307 
1308     Label different_compression_loop;
1309     Label different_compression_diff;
1310 
1311     // Main loop for different compression.
1312     __ Bind(&different_compression_loop);
1313     __ ldrb(IP, Address(temp1, c_char_size, Address::PostIndex));
1314     __ ldrh(temp3, Address(temp2, char_size, Address::PostIndex));
1315     __ cmp(IP, ShifterOperand(temp3));
1316     __ b(&different_compression_diff, NE);
1317     __ subs(temp0, temp0, ShifterOperand(2));
1318     __ b(&different_compression_loop, HI);
1319     __ b(&end);
1320 
1321     // Calculate the difference.
1322     __ Bind(&different_compression_diff);
1323     __ sub(out, IP, ShifterOperand(temp3));
1324     // Flip the difference if the `arg` is compressed.
1325     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1326     __ Lsrs(temp0, temp0, 1u);
1327     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1328                   "Expecting 0=compressed, 1=uncompressed");
1329     __ it(CC);
1330     __ rsb(out, out, ShifterOperand(0), CC);
1331   }
1332 
1333   __ Bind(&end);
1334 
1335   if (can_slow_path) {
1336     __ Bind(slow_path->GetExitLabel());
1337   }
1338 }
1339 
VisitStringEquals(HInvoke * invoke)1340 void IntrinsicLocationsBuilderARM::VisitStringEquals(HInvoke* invoke) {
1341   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1342                                                             LocationSummary::kNoCall,
1343                                                             kIntrinsified);
1344   InvokeRuntimeCallingConvention calling_convention;
1345   locations->SetInAt(0, Location::RequiresRegister());
1346   locations->SetInAt(1, Location::RequiresRegister());
1347   // Temporary registers to store lengths of strings and for calculations.
1348   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1349   locations->AddTemp(Location::RegisterLocation(R0));
1350   locations->AddTemp(Location::RequiresRegister());
1351   locations->AddTemp(Location::RequiresRegister());
1352 
1353   locations->SetOut(Location::RequiresRegister());
1354 }
1355 
VisitStringEquals(HInvoke * invoke)1356 void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
1357   ArmAssembler* assembler = GetAssembler();
1358   LocationSummary* locations = invoke->GetLocations();
1359 
1360   Register str = locations->InAt(0).AsRegister<Register>();
1361   Register arg = locations->InAt(1).AsRegister<Register>();
1362   Register out = locations->Out().AsRegister<Register>();
1363 
1364   Register temp = locations->GetTemp(0).AsRegister<Register>();
1365   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
1366   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
1367 
1368   Label loop;
1369   Label end;
1370   Label return_true;
1371   Label return_false;
1372   Label* final_label = codegen_->GetFinalLabel(invoke, &end);
1373 
1374   // Get offsets of count, value, and class fields within a string object.
1375   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1376   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1377   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1378 
1379   // Note that the null check must have been done earlier.
1380   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1381 
1382   StringEqualsOptimizations optimizations(invoke);
1383   if (!optimizations.GetArgumentNotNull()) {
1384     // Check if input is null, return false if it is.
1385     __ CompareAndBranchIfZero(arg, &return_false);
1386   }
1387 
1388   // Reference equality check, return true if same reference.
1389   __ cmp(str, ShifterOperand(arg));
1390   __ b(&return_true, EQ);
1391 
1392   if (!optimizations.GetArgumentIsString()) {
1393     // Instanceof check for the argument by comparing class fields.
1394     // All string objects must have the same type since String cannot be subclassed.
1395     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1396     // If the argument is a string object, its class field must be equal to receiver's class field.
1397     __ ldr(temp, Address(str, class_offset));
1398     __ ldr(temp1, Address(arg, class_offset));
1399     __ cmp(temp, ShifterOperand(temp1));
1400     __ b(&return_false, NE);
1401   }
1402 
1403   // Load `count` fields of this and argument strings.
1404   __ ldr(temp, Address(str, count_offset));
1405   __ ldr(temp1, Address(arg, count_offset));
1406   // Check if `count` fields are equal, return false if they're not.
1407   // Also compares the compression style, if differs return false.
1408   __ cmp(temp, ShifterOperand(temp1));
1409   __ b(&return_false, NE);
1410   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1411   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1412                 "Expecting 0=compressed, 1=uncompressed");
1413   __ cbz(temp, &return_true);
1414 
1415   // Assertions that must hold in order to compare strings 4 bytes at a time.
1416   DCHECK_ALIGNED(value_offset, 4);
1417   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1418 
1419   if (mirror::kUseStringCompression) {
1420     // For string compression, calculate the number of bytes to compare (not chars).
1421     // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1422     __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
1423     __ it(CS);                                      // If uncompressed,
1424     __ add(temp, temp, ShifterOperand(temp), CS);   //   double the byte count.
1425   }
1426 
1427   // Store offset of string value in preparation for comparison loop.
1428   __ LoadImmediate(temp1, value_offset);
1429 
1430   // Loop to compare strings 4 bytes at a time starting at the front of the string.
1431   // Ok to do this because strings are zero-padded to kObjectAlignment.
1432   __ Bind(&loop);
1433   __ ldr(out, Address(str, temp1));
1434   __ ldr(temp2, Address(arg, temp1));
1435   __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t)));
1436   __ cmp(out, ShifterOperand(temp2));
1437   __ b(&return_false, NE);
1438   // With string compression, we have compared 4 bytes, otherwise 2 chars.
1439   __ subs(temp, temp, ShifterOperand(mirror::kUseStringCompression ? 4 : 2));
1440   __ b(&loop, HI);
1441 
1442   // Return true and exit the function.
1443   // If loop does not result in returning false, we return true.
1444   __ Bind(&return_true);
1445   __ LoadImmediate(out, 1);
1446   __ b(final_label);
1447 
1448   // Return false and exit the function.
1449   __ Bind(&return_false);
1450   __ LoadImmediate(out, 0);
1451 
1452   if (end.IsLinked()) {
1453     __ Bind(&end);
1454   }
1455 }
1456 
GenerateVisitStringIndexOf(HInvoke * invoke,ArmAssembler * assembler,CodeGeneratorARM * codegen,ArenaAllocator * allocator,bool start_at_zero)1457 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1458                                        ArmAssembler* assembler,
1459                                        CodeGeneratorARM* codegen,
1460                                        ArenaAllocator* allocator,
1461                                        bool start_at_zero) {
1462   LocationSummary* locations = invoke->GetLocations();
1463 
1464   // Note that the null check must have been done earlier.
1465   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1466 
1467   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1468   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1469   SlowPathCode* slow_path = nullptr;
1470   HInstruction* code_point = invoke->InputAt(1);
1471   if (code_point->IsIntConstant()) {
1472     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1473         std::numeric_limits<uint16_t>::max()) {
1474       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1475       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1476       slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
1477       codegen->AddSlowPath(slow_path);
1478       __ b(slow_path->GetEntryLabel());
1479       __ Bind(slow_path->GetExitLabel());
1480       return;
1481     }
1482   } else if (code_point->GetType() != Primitive::kPrimChar) {
1483     Register char_reg = locations->InAt(1).AsRegister<Register>();
1484     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1485     __ cmp(char_reg,
1486            ShifterOperand(static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1));
1487     slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
1488     codegen->AddSlowPath(slow_path);
1489     __ b(slow_path->GetEntryLabel(), HS);
1490   }
1491 
1492   if (start_at_zero) {
1493     Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
1494     DCHECK_EQ(tmp_reg, R2);
1495     // Start-index = 0.
1496     __ LoadImmediate(tmp_reg, 0);
1497   }
1498 
1499   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1500   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1501 
1502   if (slow_path != nullptr) {
1503     __ Bind(slow_path->GetExitLabel());
1504   }
1505 }
1506 
VisitStringIndexOf(HInvoke * invoke)1507 void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
1508   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1509                                                             LocationSummary::kCallOnMainAndSlowPath,
1510                                                             kIntrinsified);
1511   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1512   // best to align the inputs accordingly.
1513   InvokeRuntimeCallingConvention calling_convention;
1514   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1515   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1516   locations->SetOut(Location::RegisterLocation(R0));
1517 
1518   // Need to send start-index=0.
1519   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1520 }
1521 
VisitStringIndexOf(HInvoke * invoke)1522 void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
1523   GenerateVisitStringIndexOf(
1524       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1525 }
1526 
VisitStringIndexOfAfter(HInvoke * invoke)1527 void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
1528   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1529                                                             LocationSummary::kCallOnMainAndSlowPath,
1530                                                             kIntrinsified);
1531   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1532   // best to align the inputs accordingly.
1533   InvokeRuntimeCallingConvention calling_convention;
1534   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1535   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1536   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1537   locations->SetOut(Location::RegisterLocation(R0));
1538 }
1539 
VisitStringIndexOfAfter(HInvoke * invoke)1540 void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
1541   GenerateVisitStringIndexOf(
1542       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1543 }
1544 
VisitStringNewStringFromBytes(HInvoke * invoke)1545 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
1546   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1547                                                             LocationSummary::kCallOnMainAndSlowPath,
1548                                                             kIntrinsified);
1549   InvokeRuntimeCallingConvention calling_convention;
1550   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1551   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1552   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1553   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1554   locations->SetOut(Location::RegisterLocation(R0));
1555 }
1556 
VisitStringNewStringFromBytes(HInvoke * invoke)1557 void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
1558   ArmAssembler* assembler = GetAssembler();
1559   LocationSummary* locations = invoke->GetLocations();
1560 
1561   Register byte_array = locations->InAt(0).AsRegister<Register>();
1562   __ cmp(byte_array, ShifterOperand(0));
1563   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
1564   codegen_->AddSlowPath(slow_path);
1565   __ b(slow_path->GetEntryLabel(), EQ);
1566 
1567   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1568   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1569   __ Bind(slow_path->GetExitLabel());
1570 }
1571 
VisitStringNewStringFromChars(HInvoke * invoke)1572 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
1573   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1574                                                             LocationSummary::kCallOnMainOnly,
1575                                                             kIntrinsified);
1576   InvokeRuntimeCallingConvention calling_convention;
1577   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1578   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1579   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1580   locations->SetOut(Location::RegisterLocation(R0));
1581 }
1582 
VisitStringNewStringFromChars(HInvoke * invoke)1583 void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) {
1584   // No need to emit code checking whether `locations->InAt(2)` is a null
1585   // pointer, as callers of the native method
1586   //
1587   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1588   //
1589   // all include a null check on `data` before calling that method.
1590   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1591   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1592 }
1593 
VisitStringNewStringFromString(HInvoke * invoke)1594 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
1595   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1596                                                             LocationSummary::kCallOnMainAndSlowPath,
1597                                                             kIntrinsified);
1598   InvokeRuntimeCallingConvention calling_convention;
1599   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1600   locations->SetOut(Location::RegisterLocation(R0));
1601 }
1602 
VisitStringNewStringFromString(HInvoke * invoke)1603 void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) {
1604   ArmAssembler* assembler = GetAssembler();
1605   LocationSummary* locations = invoke->GetLocations();
1606 
1607   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1608   __ cmp(string_to_copy, ShifterOperand(0));
1609   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
1610   codegen_->AddSlowPath(slow_path);
1611   __ b(slow_path->GetEntryLabel(), EQ);
1612 
1613   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1614   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1615 
1616   __ Bind(slow_path->GetExitLabel());
1617 }
1618 
VisitSystemArrayCopy(HInvoke * invoke)1619 void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
1620   // The only read barrier implementation supporting the
1621   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1622   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1623     return;
1624   }
1625 
1626   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1627   LocationSummary* locations = invoke->GetLocations();
1628   if (locations == nullptr) {
1629     return;
1630   }
1631 
1632   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1633   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1634   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1635 
1636   if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1637     locations->SetInAt(1, Location::RequiresRegister());
1638   }
1639   if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1640     locations->SetInAt(3, Location::RequiresRegister());
1641   }
1642   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1643     locations->SetInAt(4, Location::RequiresRegister());
1644   }
1645   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1646     // Temporary register IP cannot be used in
1647     // ReadBarrierSystemArrayCopySlowPathARM (because that register
1648     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1649     // temporary register from the register allocator.
1650     locations->AddTemp(Location::RequiresRegister());
1651   }
1652 }
1653 
CheckPosition(ArmAssembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)1654 static void CheckPosition(ArmAssembler* assembler,
1655                           Location pos,
1656                           Register input,
1657                           Location length,
1658                           SlowPathCode* slow_path,
1659                           Register temp,
1660                           bool length_is_input_length = false) {
1661   // Where is the length in the Array?
1662   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1663 
1664   if (pos.IsConstant()) {
1665     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1666     if (pos_const == 0) {
1667       if (!length_is_input_length) {
1668         // Check that length(input) >= length.
1669         __ LoadFromOffset(kLoadWord, temp, input, length_offset);
1670         if (length.IsConstant()) {
1671           __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
1672         } else {
1673           __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
1674         }
1675         __ b(slow_path->GetEntryLabel(), LT);
1676       }
1677     } else {
1678       // Check that length(input) >= pos.
1679       __ LoadFromOffset(kLoadWord, temp, input, length_offset);
1680       __ subs(temp, temp, ShifterOperand(pos_const));
1681       __ b(slow_path->GetEntryLabel(), LT);
1682 
1683       // Check that (length(input) - pos) >= length.
1684       if (length.IsConstant()) {
1685         __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
1686       } else {
1687         __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
1688       }
1689       __ b(slow_path->GetEntryLabel(), LT);
1690     }
1691   } else if (length_is_input_length) {
1692     // The only way the copy can succeed is if pos is zero.
1693     Register pos_reg = pos.AsRegister<Register>();
1694     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1695   } else {
1696     // Check that pos >= 0.
1697     Register pos_reg = pos.AsRegister<Register>();
1698     __ cmp(pos_reg, ShifterOperand(0));
1699     __ b(slow_path->GetEntryLabel(), LT);
1700 
1701     // Check that pos <= length(input).
1702     __ LoadFromOffset(kLoadWord, temp, input, length_offset);
1703     __ subs(temp, temp, ShifterOperand(pos_reg));
1704     __ b(slow_path->GetEntryLabel(), LT);
1705 
1706     // Check that (length(input) - pos) >= length.
1707     if (length.IsConstant()) {
1708       __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
1709     } else {
1710       __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
1711     }
1712     __ b(slow_path->GetEntryLabel(), LT);
1713   }
1714 }
1715 
VisitSystemArrayCopy(HInvoke * invoke)1716 void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
1717   // The only read barrier implementation supporting the
1718   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1719   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1720 
1721   ArmAssembler* assembler = GetAssembler();
1722   LocationSummary* locations = invoke->GetLocations();
1723 
1724   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1725   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1726   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1727   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1728   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1729 
1730   Register src = locations->InAt(0).AsRegister<Register>();
1731   Location src_pos = locations->InAt(1);
1732   Register dest = locations->InAt(2).AsRegister<Register>();
1733   Location dest_pos = locations->InAt(3);
1734   Location length = locations->InAt(4);
1735   Location temp1_loc = locations->GetTemp(0);
1736   Register temp1 = temp1_loc.AsRegister<Register>();
1737   Location temp2_loc = locations->GetTemp(1);
1738   Register temp2 = temp2_loc.AsRegister<Register>();
1739   Location temp3_loc = locations->GetTemp(2);
1740   Register temp3 = temp3_loc.AsRegister<Register>();
1741 
1742   SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
1743   codegen_->AddSlowPath(intrinsic_slow_path);
1744 
1745   Label conditions_on_positions_validated;
1746   SystemArrayCopyOptimizations optimizations(invoke);
1747 
1748   // If source and destination are the same, we go to slow path if we need to do
1749   // forward copying.
1750   if (src_pos.IsConstant()) {
1751     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1752     if (dest_pos.IsConstant()) {
1753       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1754       if (optimizations.GetDestinationIsSource()) {
1755         // Checked when building locations.
1756         DCHECK_GE(src_pos_constant, dest_pos_constant);
1757       } else if (src_pos_constant < dest_pos_constant) {
1758         __ cmp(src, ShifterOperand(dest));
1759         __ b(intrinsic_slow_path->GetEntryLabel(), EQ);
1760       }
1761 
1762       // Checked when building locations.
1763       DCHECK(!optimizations.GetDestinationIsSource()
1764              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
1765     } else {
1766       if (!optimizations.GetDestinationIsSource()) {
1767         __ cmp(src, ShifterOperand(dest));
1768         __ b(&conditions_on_positions_validated, NE);
1769       }
1770       __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
1771       __ b(intrinsic_slow_path->GetEntryLabel(), GT);
1772     }
1773   } else {
1774     if (!optimizations.GetDestinationIsSource()) {
1775       __ cmp(src, ShifterOperand(dest));
1776       __ b(&conditions_on_positions_validated, NE);
1777     }
1778     if (dest_pos.IsConstant()) {
1779       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1780       __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos_constant));
1781     } else {
1782       __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
1783     }
1784     __ b(intrinsic_slow_path->GetEntryLabel(), LT);
1785   }
1786 
1787   __ Bind(&conditions_on_positions_validated);
1788 
1789   if (!optimizations.GetSourceIsNotNull()) {
1790     // Bail out if the source is null.
1791     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1792   }
1793 
1794   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1795     // Bail out if the destination is null.
1796     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1797   }
1798 
1799   // If the length is negative, bail out.
1800   // We have already checked in the LocationsBuilder for the constant case.
1801   if (!length.IsConstant() &&
1802       !optimizations.GetCountIsSourceLength() &&
1803       !optimizations.GetCountIsDestinationLength()) {
1804     __ cmp(length.AsRegister<Register>(), ShifterOperand(0));
1805     __ b(intrinsic_slow_path->GetEntryLabel(), LT);
1806   }
1807 
1808   // Validity checks: source.
1809   CheckPosition(assembler,
1810                 src_pos,
1811                 src,
1812                 length,
1813                 intrinsic_slow_path,
1814                 temp1,
1815                 optimizations.GetCountIsSourceLength());
1816 
1817   // Validity checks: dest.
1818   CheckPosition(assembler,
1819                 dest_pos,
1820                 dest,
1821                 length,
1822                 intrinsic_slow_path,
1823                 temp1,
1824                 optimizations.GetCountIsDestinationLength());
1825 
1826   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1827     // Check whether all elements of the source array are assignable to the component
1828     // type of the destination array. We do two checks: the classes are the same,
1829     // or the destination is Object[]. If none of these checks succeed, we go to the
1830     // slow path.
1831 
1832     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1833       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1834         // /* HeapReference<Class> */ temp1 = src->klass_
1835         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1836             invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1837         // Bail out if the source is not a non primitive array.
1838         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1839         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1840             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1841         __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
1842         // If heap poisoning is enabled, `temp1` has been unpoisoned
1843         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1844         // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1845         __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset);
1846         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1847         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1848       }
1849 
1850       // /* HeapReference<Class> */ temp1 = dest->klass_
1851       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1852           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
1853 
1854       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1855         // Bail out if the destination is not a non primitive array.
1856         //
1857         // Register `temp1` is not trashed by the read barrier emitted
1858         // by GenerateFieldLoadWithBakerReadBarrier below, as that
1859         // method produces a call to a ReadBarrierMarkRegX entry point,
1860         // which saves all potentially live registers, including
1861         // temporaries such a `temp1`.
1862         // /* HeapReference<Class> */ temp2 = temp1->component_type_
1863         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1864             invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
1865         __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
1866         // If heap poisoning is enabled, `temp2` has been unpoisoned
1867         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1868         // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1869         __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset);
1870         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1871         __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
1872       }
1873 
1874       // For the same reason given earlier, `temp1` is not trashed by the
1875       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1876       // /* HeapReference<Class> */ temp2 = src->klass_
1877       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1878           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
1879       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1880       __ cmp(temp1, ShifterOperand(temp2));
1881 
1882       if (optimizations.GetDestinationIsTypedObjectArray()) {
1883         Label do_copy;
1884         __ b(&do_copy, EQ);
1885         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1886         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1887             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1888         // /* HeapReference<Class> */ temp1 = temp1->super_class_
1889         // We do not need to emit a read barrier for the following
1890         // heap reference load, as `temp1` is only used in a
1891         // comparison with null below, and this reference is not
1892         // kept afterwards.
1893         __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
1894         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1895         __ Bind(&do_copy);
1896       } else {
1897         __ b(intrinsic_slow_path->GetEntryLabel(), NE);
1898       }
1899     } else {
1900       // Non read barrier code.
1901 
1902       // /* HeapReference<Class> */ temp1 = dest->klass_
1903       __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
1904       // /* HeapReference<Class> */ temp2 = src->klass_
1905       __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
1906       bool did_unpoison = false;
1907       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1908           !optimizations.GetSourceIsNonPrimitiveArray()) {
1909         // One or two of the references need to be unpoisoned. Unpoison them
1910         // both to make the identity check valid.
1911         __ MaybeUnpoisonHeapReference(temp1);
1912         __ MaybeUnpoisonHeapReference(temp2);
1913         did_unpoison = true;
1914       }
1915 
1916       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1917         // Bail out if the destination is not a non primitive array.
1918         // /* HeapReference<Class> */ temp3 = temp1->component_type_
1919         __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
1920         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1921         __ MaybeUnpoisonHeapReference(temp3);
1922         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1923         __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
1924         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1925         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1926       }
1927 
1928       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1929         // Bail out if the source is not a non primitive array.
1930         // /* HeapReference<Class> */ temp3 = temp2->component_type_
1931         __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
1932         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1933         __ MaybeUnpoisonHeapReference(temp3);
1934         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1935         __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
1936         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1937         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1938       }
1939 
1940       __ cmp(temp1, ShifterOperand(temp2));
1941 
1942       if (optimizations.GetDestinationIsTypedObjectArray()) {
1943         Label do_copy;
1944         __ b(&do_copy, EQ);
1945         if (!did_unpoison) {
1946           __ MaybeUnpoisonHeapReference(temp1);
1947         }
1948         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1949         __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
1950         __ MaybeUnpoisonHeapReference(temp1);
1951         // /* HeapReference<Class> */ temp1 = temp1->super_class_
1952         __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
1953         // No need to unpoison the result, we're comparing against null.
1954         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1955         __ Bind(&do_copy);
1956       } else {
1957         __ b(intrinsic_slow_path->GetEntryLabel(), NE);
1958       }
1959     }
1960   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1961     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1962     // Bail out if the source is not a non primitive array.
1963     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1964       // /* HeapReference<Class> */ temp1 = src->klass_
1965       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1966           invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1967       // /* HeapReference<Class> */ temp3 = temp1->component_type_
1968       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1969           invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1970       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1971       // If heap poisoning is enabled, `temp3` has been unpoisoned
1972       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1973     } else {
1974       // /* HeapReference<Class> */ temp1 = src->klass_
1975       __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
1976       __ MaybeUnpoisonHeapReference(temp1);
1977       // /* HeapReference<Class> */ temp3 = temp1->component_type_
1978       __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
1979       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1980       __ MaybeUnpoisonHeapReference(temp3);
1981     }
1982     // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1983     __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
1984     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1985     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1986   }
1987 
1988   if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1989     // Null constant length: not need to emit the loop code at all.
1990   } else {
1991     Label done;
1992     const Primitive::Type type = Primitive::kPrimNot;
1993     const int32_t element_size = Primitive::ComponentSize(type);
1994 
1995     if (length.IsRegister()) {
1996       // Don't enter the copy loop if the length is null.
1997       __ CompareAndBranchIfZero(length.AsRegister<Register>(), &done);
1998     }
1999 
2000     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2001       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2002 
2003       // SystemArrayCopy implementation for Baker read barriers (see
2004       // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2005       //
2006       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2007       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2008       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
2009       //   if (is_gray) {
2010       //     // Slow-path copy.
2011       //     do {
2012       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2013       //     } while (src_ptr != end_ptr)
2014       //   } else {
2015       //     // Fast-path copy.
2016       //     do {
2017       //       *dest_ptr++ = *src_ptr++;
2018       //     } while (src_ptr != end_ptr)
2019       //   }
2020 
2021       // /* int32_t */ monitor = src->monitor_
2022       __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
2023       // /* LockWord */ lock_word = LockWord(monitor)
2024       static_assert(sizeof(LockWord) == sizeof(int32_t),
2025                     "art::LockWord and int32_t have different sizes.");
2026 
2027       // Introduce a dependency on the lock_word including the rb_state,
2028       // which shall prevent load-load reordering without using
2029       // a memory barrier (which would be more expensive).
2030       // `src` is unchanged by this operation, but its value now depends
2031       // on `temp2`.
2032       __ add(src, src, ShifterOperand(temp2, LSR, 32));
2033 
2034       // Compute the base source address in `temp1`.
2035       // Note that `temp1` (the base source address) is computed from
2036       // `src` (and `src_pos`) here, and thus honors the artificial
2037       // dependency of `src` on `temp2`.
2038       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2039       // Compute the end source address in `temp3`.
2040       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2041       // The base destination address is computed later, as `temp2` is
2042       // used for intermediate computations.
2043 
2044       // Slow path used to copy array when `src` is gray.
2045       // Note that the base destination address is computed in `temp2`
2046       // by the slow path code.
2047       SlowPathCode* read_barrier_slow_path =
2048           new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
2049       codegen_->AddSlowPath(read_barrier_slow_path);
2050 
2051       // Given the numeric representation, it's enough to check the low bit of the
2052       // rb_state. We do that by shifting the bit out of the lock word with LSRS
2053       // which can be a 16-bit instruction unlike the TST immediate.
2054       static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2055       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2056       __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2057       // Carry flag is the last bit shifted out by LSRS.
2058       __ b(read_barrier_slow_path->GetEntryLabel(), CS);
2059 
2060       // Fast-path copy.
2061       // Compute the base destination address in `temp2`.
2062       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2063       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2064       // poison/unpoison.
2065       Label loop;
2066       __ Bind(&loop);
2067       __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
2068       __ str(IP, Address(temp2, element_size, Address::PostIndex));
2069       __ cmp(temp1, ShifterOperand(temp3));
2070       __ b(&loop, NE);
2071 
2072       __ Bind(read_barrier_slow_path->GetExitLabel());
2073     } else {
2074       // Non read barrier code.
2075       // Compute the base source address in `temp1`.
2076       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2077       // Compute the base destination address in `temp2`.
2078       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2079       // Compute the end source address in `temp3`.
2080       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2081       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2082       // poison/unpoison.
2083       Label loop;
2084       __ Bind(&loop);
2085       __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
2086       __ str(IP, Address(temp2, element_size, Address::PostIndex));
2087       __ cmp(temp1, ShifterOperand(temp3));
2088       __ b(&loop, NE);
2089     }
2090     __ Bind(&done);
2091   }
2092 
2093   // We only need one card marking on the destination array.
2094   codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
2095 
2096   __ Bind(intrinsic_slow_path->GetExitLabel());
2097 }
2098 
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)2099 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2100   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2101   // the code generator. Furthermore, the register allocator creates fixed live intervals
2102   // for all caller-saved registers because we are doing a function call. As a result, if
2103   // the input and output locations are unallocated, the register allocator runs out of
2104   // registers and fails; however, a debuggable graph is not the common case.
2105   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2106     return;
2107   }
2108 
2109   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2110   DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2111   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2112 
2113   LocationSummary* const locations = new (arena) LocationSummary(invoke,
2114                                                                  LocationSummary::kCallOnMainOnly,
2115                                                                  kIntrinsified);
2116   const InvokeRuntimeCallingConvention calling_convention;
2117 
2118   locations->SetInAt(0, Location::RequiresFpuRegister());
2119   locations->SetOut(Location::RequiresFpuRegister());
2120   // Native code uses the soft float ABI.
2121   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2122   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
2123 }
2124 
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)2125 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2126   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2127   // the code generator. Furthermore, the register allocator creates fixed live intervals
2128   // for all caller-saved registers because we are doing a function call. As a result, if
2129   // the input and output locations are unallocated, the register allocator runs out of
2130   // registers and fails; however, a debuggable graph is not the common case.
2131   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2132     return;
2133   }
2134 
2135   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2136   DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2137   DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
2138   DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2139 
2140   LocationSummary* const locations = new (arena) LocationSummary(invoke,
2141                                                                  LocationSummary::kCallOnMainOnly,
2142                                                                  kIntrinsified);
2143   const InvokeRuntimeCallingConvention calling_convention;
2144 
2145   locations->SetInAt(0, Location::RequiresFpuRegister());
2146   locations->SetInAt(1, Location::RequiresFpuRegister());
2147   locations->SetOut(Location::RequiresFpuRegister());
2148   // Native code uses the soft float ABI.
2149   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2150   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
2151   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
2152   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
2153 }
2154 
GenFPToFPCall(HInvoke * invoke,ArmAssembler * assembler,CodeGeneratorARM * codegen,QuickEntrypointEnum entry)2155 static void GenFPToFPCall(HInvoke* invoke,
2156                           ArmAssembler* assembler,
2157                           CodeGeneratorARM* codegen,
2158                           QuickEntrypointEnum entry) {
2159   LocationSummary* const locations = invoke->GetLocations();
2160   const InvokeRuntimeCallingConvention calling_convention;
2161 
2162   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2163   DCHECK(locations->WillCall() && locations->Intrinsified());
2164   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0)));
2165   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1)));
2166 
2167   // Native code uses the soft float ABI.
2168   __ vmovrrd(calling_convention.GetRegisterAt(0),
2169              calling_convention.GetRegisterAt(1),
2170              FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
2171   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2172   __ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
2173              calling_convention.GetRegisterAt(0),
2174              calling_convention.GetRegisterAt(1));
2175 }
2176 
GenFPFPToFPCall(HInvoke * invoke,ArmAssembler * assembler,CodeGeneratorARM * codegen,QuickEntrypointEnum entry)2177 static void GenFPFPToFPCall(HInvoke* invoke,
2178                           ArmAssembler* assembler,
2179                           CodeGeneratorARM* codegen,
2180                           QuickEntrypointEnum entry) {
2181   LocationSummary* const locations = invoke->GetLocations();
2182   const InvokeRuntimeCallingConvention calling_convention;
2183 
2184   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2185   DCHECK(locations->WillCall() && locations->Intrinsified());
2186   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0)));
2187   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1)));
2188   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(2)));
2189   DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(3)));
2190 
2191   // Native code uses the soft float ABI.
2192   __ vmovrrd(calling_convention.GetRegisterAt(0),
2193              calling_convention.GetRegisterAt(1),
2194              FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
2195   __ vmovrrd(calling_convention.GetRegisterAt(2),
2196              calling_convention.GetRegisterAt(3),
2197              FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()));
2198   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2199   __ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
2200              calling_convention.GetRegisterAt(0),
2201              calling_convention.GetRegisterAt(1));
2202 }
2203 
VisitMathCos(HInvoke * invoke)2204 void IntrinsicLocationsBuilderARM::VisitMathCos(HInvoke* invoke) {
2205   CreateFPToFPCallLocations(arena_, invoke);
2206 }
2207 
VisitMathCos(HInvoke * invoke)2208 void IntrinsicCodeGeneratorARM::VisitMathCos(HInvoke* invoke) {
2209   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2210 }
2211 
VisitMathSin(HInvoke * invoke)2212 void IntrinsicLocationsBuilderARM::VisitMathSin(HInvoke* invoke) {
2213   CreateFPToFPCallLocations(arena_, invoke);
2214 }
2215 
VisitMathSin(HInvoke * invoke)2216 void IntrinsicCodeGeneratorARM::VisitMathSin(HInvoke* invoke) {
2217   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2218 }
2219 
VisitMathAcos(HInvoke * invoke)2220 void IntrinsicLocationsBuilderARM::VisitMathAcos(HInvoke* invoke) {
2221   CreateFPToFPCallLocations(arena_, invoke);
2222 }
2223 
VisitMathAcos(HInvoke * invoke)2224 void IntrinsicCodeGeneratorARM::VisitMathAcos(HInvoke* invoke) {
2225   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2226 }
2227 
VisitMathAsin(HInvoke * invoke)2228 void IntrinsicLocationsBuilderARM::VisitMathAsin(HInvoke* invoke) {
2229   CreateFPToFPCallLocations(arena_, invoke);
2230 }
2231 
VisitMathAsin(HInvoke * invoke)2232 void IntrinsicCodeGeneratorARM::VisitMathAsin(HInvoke* invoke) {
2233   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2234 }
2235 
VisitMathAtan(HInvoke * invoke)2236 void IntrinsicLocationsBuilderARM::VisitMathAtan(HInvoke* invoke) {
2237   CreateFPToFPCallLocations(arena_, invoke);
2238 }
2239 
VisitMathAtan(HInvoke * invoke)2240 void IntrinsicCodeGeneratorARM::VisitMathAtan(HInvoke* invoke) {
2241   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2242 }
2243 
VisitMathCbrt(HInvoke * invoke)2244 void IntrinsicLocationsBuilderARM::VisitMathCbrt(HInvoke* invoke) {
2245   CreateFPToFPCallLocations(arena_, invoke);
2246 }
2247 
VisitMathCbrt(HInvoke * invoke)2248 void IntrinsicCodeGeneratorARM::VisitMathCbrt(HInvoke* invoke) {
2249   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2250 }
2251 
VisitMathCosh(HInvoke * invoke)2252 void IntrinsicLocationsBuilderARM::VisitMathCosh(HInvoke* invoke) {
2253   CreateFPToFPCallLocations(arena_, invoke);
2254 }
2255 
VisitMathCosh(HInvoke * invoke)2256 void IntrinsicCodeGeneratorARM::VisitMathCosh(HInvoke* invoke) {
2257   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2258 }
2259 
VisitMathExp(HInvoke * invoke)2260 void IntrinsicLocationsBuilderARM::VisitMathExp(HInvoke* invoke) {
2261   CreateFPToFPCallLocations(arena_, invoke);
2262 }
2263 
VisitMathExp(HInvoke * invoke)2264 void IntrinsicCodeGeneratorARM::VisitMathExp(HInvoke* invoke) {
2265   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2266 }
2267 
VisitMathExpm1(HInvoke * invoke)2268 void IntrinsicLocationsBuilderARM::VisitMathExpm1(HInvoke* invoke) {
2269   CreateFPToFPCallLocations(arena_, invoke);
2270 }
2271 
VisitMathExpm1(HInvoke * invoke)2272 void IntrinsicCodeGeneratorARM::VisitMathExpm1(HInvoke* invoke) {
2273   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2274 }
2275 
VisitMathLog(HInvoke * invoke)2276 void IntrinsicLocationsBuilderARM::VisitMathLog(HInvoke* invoke) {
2277   CreateFPToFPCallLocations(arena_, invoke);
2278 }
2279 
VisitMathLog(HInvoke * invoke)2280 void IntrinsicCodeGeneratorARM::VisitMathLog(HInvoke* invoke) {
2281   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2282 }
2283 
VisitMathLog10(HInvoke * invoke)2284 void IntrinsicLocationsBuilderARM::VisitMathLog10(HInvoke* invoke) {
2285   CreateFPToFPCallLocations(arena_, invoke);
2286 }
2287 
VisitMathLog10(HInvoke * invoke)2288 void IntrinsicCodeGeneratorARM::VisitMathLog10(HInvoke* invoke) {
2289   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2290 }
2291 
VisitMathSinh(HInvoke * invoke)2292 void IntrinsicLocationsBuilderARM::VisitMathSinh(HInvoke* invoke) {
2293   CreateFPToFPCallLocations(arena_, invoke);
2294 }
2295 
VisitMathSinh(HInvoke * invoke)2296 void IntrinsicCodeGeneratorARM::VisitMathSinh(HInvoke* invoke) {
2297   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2298 }
2299 
VisitMathTan(HInvoke * invoke)2300 void IntrinsicLocationsBuilderARM::VisitMathTan(HInvoke* invoke) {
2301   CreateFPToFPCallLocations(arena_, invoke);
2302 }
2303 
VisitMathTan(HInvoke * invoke)2304 void IntrinsicCodeGeneratorARM::VisitMathTan(HInvoke* invoke) {
2305   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2306 }
2307 
VisitMathTanh(HInvoke * invoke)2308 void IntrinsicLocationsBuilderARM::VisitMathTanh(HInvoke* invoke) {
2309   CreateFPToFPCallLocations(arena_, invoke);
2310 }
2311 
VisitMathTanh(HInvoke * invoke)2312 void IntrinsicCodeGeneratorARM::VisitMathTanh(HInvoke* invoke) {
2313   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2314 }
2315 
VisitMathAtan2(HInvoke * invoke)2316 void IntrinsicLocationsBuilderARM::VisitMathAtan2(HInvoke* invoke) {
2317   CreateFPFPToFPCallLocations(arena_, invoke);
2318 }
2319 
VisitMathAtan2(HInvoke * invoke)2320 void IntrinsicCodeGeneratorARM::VisitMathAtan2(HInvoke* invoke) {
2321   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2322 }
2323 
VisitMathHypot(HInvoke * invoke)2324 void IntrinsicLocationsBuilderARM::VisitMathHypot(HInvoke* invoke) {
2325   CreateFPFPToFPCallLocations(arena_, invoke);
2326 }
2327 
VisitMathHypot(HInvoke * invoke)2328 void IntrinsicCodeGeneratorARM::VisitMathHypot(HInvoke* invoke) {
2329   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2330 }
2331 
VisitMathNextAfter(HInvoke * invoke)2332 void IntrinsicLocationsBuilderARM::VisitMathNextAfter(HInvoke* invoke) {
2333   CreateFPFPToFPCallLocations(arena_, invoke);
2334 }
2335 
VisitMathNextAfter(HInvoke * invoke)2336 void IntrinsicCodeGeneratorARM::VisitMathNextAfter(HInvoke* invoke) {
2337   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2338 }
2339 
VisitIntegerReverse(HInvoke * invoke)2340 void IntrinsicLocationsBuilderARM::VisitIntegerReverse(HInvoke* invoke) {
2341   CreateIntToIntLocations(arena_, invoke);
2342 }
2343 
VisitIntegerReverse(HInvoke * invoke)2344 void IntrinsicCodeGeneratorARM::VisitIntegerReverse(HInvoke* invoke) {
2345   ArmAssembler* assembler = GetAssembler();
2346   LocationSummary* locations = invoke->GetLocations();
2347 
2348   Register out = locations->Out().AsRegister<Register>();
2349   Register in  = locations->InAt(0).AsRegister<Register>();
2350 
2351   __ rbit(out, in);
2352 }
2353 
VisitLongReverse(HInvoke * invoke)2354 void IntrinsicLocationsBuilderARM::VisitLongReverse(HInvoke* invoke) {
2355   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2356                                                             LocationSummary::kNoCall,
2357                                                             kIntrinsified);
2358   locations->SetInAt(0, Location::RequiresRegister());
2359   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2360 }
2361 
VisitLongReverse(HInvoke * invoke)2362 void IntrinsicCodeGeneratorARM::VisitLongReverse(HInvoke* invoke) {
2363   ArmAssembler* assembler = GetAssembler();
2364   LocationSummary* locations = invoke->GetLocations();
2365 
2366   Register in_reg_lo  = locations->InAt(0).AsRegisterPairLow<Register>();
2367   Register in_reg_hi  = locations->InAt(0).AsRegisterPairHigh<Register>();
2368   Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
2369   Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
2370 
2371   __ rbit(out_reg_lo, in_reg_hi);
2372   __ rbit(out_reg_hi, in_reg_lo);
2373 }
2374 
VisitIntegerReverseBytes(HInvoke * invoke)2375 void IntrinsicLocationsBuilderARM::VisitIntegerReverseBytes(HInvoke* invoke) {
2376   CreateIntToIntLocations(arena_, invoke);
2377 }
2378 
VisitIntegerReverseBytes(HInvoke * invoke)2379 void IntrinsicCodeGeneratorARM::VisitIntegerReverseBytes(HInvoke* invoke) {
2380   ArmAssembler* assembler = GetAssembler();
2381   LocationSummary* locations = invoke->GetLocations();
2382 
2383   Register out = locations->Out().AsRegister<Register>();
2384   Register in  = locations->InAt(0).AsRegister<Register>();
2385 
2386   __ rev(out, in);
2387 }
2388 
VisitLongReverseBytes(HInvoke * invoke)2389 void IntrinsicLocationsBuilderARM::VisitLongReverseBytes(HInvoke* invoke) {
2390   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2391                                                             LocationSummary::kNoCall,
2392                                                             kIntrinsified);
2393   locations->SetInAt(0, Location::RequiresRegister());
2394   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2395 }
2396 
VisitLongReverseBytes(HInvoke * invoke)2397 void IntrinsicCodeGeneratorARM::VisitLongReverseBytes(HInvoke* invoke) {
2398   ArmAssembler* assembler = GetAssembler();
2399   LocationSummary* locations = invoke->GetLocations();
2400 
2401   Register in_reg_lo  = locations->InAt(0).AsRegisterPairLow<Register>();
2402   Register in_reg_hi  = locations->InAt(0).AsRegisterPairHigh<Register>();
2403   Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
2404   Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
2405 
2406   __ rev(out_reg_lo, in_reg_hi);
2407   __ rev(out_reg_hi, in_reg_lo);
2408 }
2409 
VisitShortReverseBytes(HInvoke * invoke)2410 void IntrinsicLocationsBuilderARM::VisitShortReverseBytes(HInvoke* invoke) {
2411   CreateIntToIntLocations(arena_, invoke);
2412 }
2413 
VisitShortReverseBytes(HInvoke * invoke)2414 void IntrinsicCodeGeneratorARM::VisitShortReverseBytes(HInvoke* invoke) {
2415   ArmAssembler* assembler = GetAssembler();
2416   LocationSummary* locations = invoke->GetLocations();
2417 
2418   Register out = locations->Out().AsRegister<Register>();
2419   Register in  = locations->InAt(0).AsRegister<Register>();
2420 
2421   __ revsh(out, in);
2422 }
2423 
GenBitCount(HInvoke * instr,Primitive::Type type,ArmAssembler * assembler)2424 static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmAssembler* assembler) {
2425   DCHECK(Primitive::IsIntOrLongType(type)) << type;
2426   DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
2427   DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
2428 
2429   bool is_long = type == Primitive::kPrimLong;
2430   LocationSummary* locations = instr->GetLocations();
2431   Location in = locations->InAt(0);
2432   Register src_0 = is_long ? in.AsRegisterPairLow<Register>() : in.AsRegister<Register>();
2433   Register src_1 = is_long ? in.AsRegisterPairHigh<Register>() : src_0;
2434   SRegister tmp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
2435   DRegister tmp_d = FromLowSToD(tmp_s);
2436   Register  out_r = locations->Out().AsRegister<Register>();
2437 
2438   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2439   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2440   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2441   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2442   __ vmovdrr(tmp_d, src_1, src_0);                         // Temp DReg |--src_1|--src_0|
2443   __ vcntd(tmp_d, tmp_d);                                  // Temp DReg |c|c|c|c|c|c|c|c|
2444   __ vpaddld(tmp_d, tmp_d, 8, /* is_unsigned */ true);     // Temp DReg |--c|--c|--c|--c|
2445   __ vpaddld(tmp_d, tmp_d, 16, /* is_unsigned */ true);    // Temp DReg |------c|------c|
2446   if (is_long) {
2447     __ vpaddld(tmp_d, tmp_d, 32, /* is_unsigned */ true);  // Temp DReg |--------------c|
2448   }
2449   __ vmovrs(out_r, tmp_s);
2450 }
2451 
VisitIntegerBitCount(HInvoke * invoke)2452 void IntrinsicLocationsBuilderARM::VisitIntegerBitCount(HInvoke* invoke) {
2453   CreateIntToIntLocations(arena_, invoke);
2454   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2455 }
2456 
VisitIntegerBitCount(HInvoke * invoke)2457 void IntrinsicCodeGeneratorARM::VisitIntegerBitCount(HInvoke* invoke) {
2458   GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
2459 }
2460 
VisitLongBitCount(HInvoke * invoke)2461 void IntrinsicLocationsBuilderARM::VisitLongBitCount(HInvoke* invoke) {
2462   VisitIntegerBitCount(invoke);
2463 }
2464 
VisitLongBitCount(HInvoke * invoke)2465 void IntrinsicCodeGeneratorARM::VisitLongBitCount(HInvoke* invoke) {
2466   GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
2467 }
2468 
VisitStringGetCharsNoCheck(HInvoke * invoke)2469 void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2470   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2471                                                             LocationSummary::kNoCall,
2472                                                             kIntrinsified);
2473   locations->SetInAt(0, Location::RequiresRegister());
2474   locations->SetInAt(1, Location::RequiresRegister());
2475   locations->SetInAt(2, Location::RequiresRegister());
2476   locations->SetInAt(3, Location::RequiresRegister());
2477   locations->SetInAt(4, Location::RequiresRegister());
2478 
2479   // Temporary registers to store lengths of strings and for calculations.
2480   locations->AddTemp(Location::RequiresRegister());
2481   locations->AddTemp(Location::RequiresRegister());
2482   locations->AddTemp(Location::RequiresRegister());
2483 }
2484 
VisitStringGetCharsNoCheck(HInvoke * invoke)2485 void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2486   ArmAssembler* assembler = GetAssembler();
2487   LocationSummary* locations = invoke->GetLocations();
2488 
2489   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2490   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2491   DCHECK_EQ(char_size, 2u);
2492 
2493   // Location of data in char array buffer.
2494   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2495 
2496   // Location of char array data in string.
2497   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2498 
2499   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2500   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2501   Register srcObj = locations->InAt(0).AsRegister<Register>();
2502   Register srcBegin = locations->InAt(1).AsRegister<Register>();
2503   Register srcEnd = locations->InAt(2).AsRegister<Register>();
2504   Register dstObj = locations->InAt(3).AsRegister<Register>();
2505   Register dstBegin = locations->InAt(4).AsRegister<Register>();
2506 
2507   Register num_chr = locations->GetTemp(0).AsRegister<Register>();
2508   Register src_ptr = locations->GetTemp(1).AsRegister<Register>();
2509   Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
2510 
2511   Label done, compressed_string_loop;
2512   Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2513   // dst to be copied.
2514   __ add(dst_ptr, dstObj, ShifterOperand(data_offset));
2515   __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
2516 
2517   __ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
2518   // Early out for valid zero-length retrievals.
2519   __ b(final_label, EQ);
2520 
2521   // src range to copy.
2522   __ add(src_ptr, srcObj, ShifterOperand(value_offset));
2523   Label compressed_string_preloop;
2524   if (mirror::kUseStringCompression) {
2525     // Location of count in string.
2526     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2527     // String's length.
2528     __ ldr(IP, Address(srcObj, count_offset));
2529     __ tst(IP, ShifterOperand(1));
2530     __ b(&compressed_string_preloop, EQ);
2531   }
2532   __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
2533 
2534   // Do the copy.
2535   Label loop, remainder;
2536 
2537   // Save repairing the value of num_chr on the < 4 character path.
2538   __ subs(IP, num_chr, ShifterOperand(4));
2539   __ b(&remainder, LT);
2540 
2541   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2542   __ mov(num_chr, ShifterOperand(IP));
2543 
2544   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2545   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2546   // to rectify these everywhere this intrinsic applies.)
2547   __ Bind(&loop);
2548   __ ldr(IP, Address(src_ptr, char_size * 2));
2549   __ subs(num_chr, num_chr, ShifterOperand(4));
2550   __ str(IP, Address(dst_ptr, char_size * 2));
2551   __ ldr(IP, Address(src_ptr, char_size * 4, Address::PostIndex));
2552   __ str(IP, Address(dst_ptr, char_size * 4, Address::PostIndex));
2553   __ b(&loop, GE);
2554 
2555   __ adds(num_chr, num_chr, ShifterOperand(4));
2556   __ b(final_label, EQ);
2557 
2558   // Main loop for < 4 character case and remainder handling. Loads and stores one
2559   // 16-bit Java character at a time.
2560   __ Bind(&remainder);
2561   __ ldrh(IP, Address(src_ptr, char_size, Address::PostIndex));
2562   __ subs(num_chr, num_chr, ShifterOperand(1));
2563   __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
2564   __ b(&remainder, GT);
2565 
2566   if (mirror::kUseStringCompression) {
2567     __ b(final_label);
2568 
2569     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2570     DCHECK_EQ(c_char_size, 1u);
2571     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2572     __ Bind(&compressed_string_preloop);
2573     __ add(src_ptr, src_ptr, ShifterOperand(srcBegin));
2574     __ Bind(&compressed_string_loop);
2575     __ ldrb(IP, Address(src_ptr, c_char_size, Address::PostIndex));
2576     __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
2577     __ subs(num_chr, num_chr, ShifterOperand(1));
2578     __ b(&compressed_string_loop, GT);
2579   }
2580 
2581   if (done.IsLinked()) {
2582     __ Bind(&done);
2583   }
2584 }
2585 
VisitFloatIsInfinite(HInvoke * invoke)2586 void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
2587   CreateFPToIntLocations(arena_, invoke);
2588 }
2589 
VisitFloatIsInfinite(HInvoke * invoke)2590 void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) {
2591   ArmAssembler* const assembler = GetAssembler();
2592   LocationSummary* const locations = invoke->GetLocations();
2593   const Register out = locations->Out().AsRegister<Register>();
2594   // Shifting left by 1 bit makes the value encodable as an immediate operand;
2595   // we don't care about the sign bit anyway.
2596   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2597 
2598   __ vmovrs(out, locations->InAt(0).AsFpuRegister<SRegister>());
2599   // We don't care about the sign bit, so shift left.
2600   __ Lsl(out, out, 1);
2601   __ eor(out, out, ShifterOperand(infinity));
2602   // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2603   __ clz(out, out);
2604   // Any number less than 32 logically shifted right by 5 bits results in 0;
2605   // the same operation on 32 yields 1.
2606   __ Lsr(out, out, 5);
2607 }
2608 
VisitDoubleIsInfinite(HInvoke * invoke)2609 void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
2610   CreateFPToIntLocations(arena_, invoke);
2611 }
2612 
VisitDoubleIsInfinite(HInvoke * invoke)2613 void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
2614   ArmAssembler* const assembler = GetAssembler();
2615   LocationSummary* const locations = invoke->GetLocations();
2616   const Register out = locations->Out().AsRegister<Register>();
2617   // The highest 32 bits of double precision positive infinity separated into
2618   // two constants encodable as immediate operands.
2619   constexpr uint32_t infinity_high  = 0x7f000000U;
2620   constexpr uint32_t infinity_high2 = 0x00f00000U;
2621 
2622   static_assert((infinity_high | infinity_high2) == static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2623                 "The constants do not add up to the high 32 bits of double precision positive infinity.");
2624   __ vmovrrd(IP, out, FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
2625   __ eor(out, out, ShifterOperand(infinity_high));
2626   __ eor(out, out, ShifterOperand(infinity_high2));
2627   // We don't care about the sign bit, so shift left.
2628   __ orr(out, IP, ShifterOperand(out, LSL, 1));
2629   // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2630   __ clz(out, out);
2631   // Any number less than 32 logically shifted right by 5 bits results in 0;
2632   // the same operation on 32 yields 1.
2633   __ Lsr(out, out, 5);
2634 }
2635 
VisitReferenceGetReferent(HInvoke * invoke)2636 void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) {
2637   if (kEmitCompilerReadBarrier) {
2638     // Do not intrinsify this call with the read barrier configuration.
2639     return;
2640   }
2641   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2642                                                             LocationSummary::kCallOnSlowPath,
2643                                                             kIntrinsified);
2644   locations->SetInAt(0, Location::RequiresRegister());
2645   locations->SetOut(Location::SameAsFirstInput());
2646   locations->AddTemp(Location::RequiresRegister());
2647 }
2648 
VisitReferenceGetReferent(HInvoke * invoke)2649 void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) {
2650   DCHECK(!kEmitCompilerReadBarrier);
2651   ArmAssembler* const assembler = GetAssembler();
2652   LocationSummary* locations = invoke->GetLocations();
2653 
2654   Register obj = locations->InAt(0).AsRegister<Register>();
2655   Register out = locations->Out().AsRegister<Register>();
2656 
2657   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
2658   codegen_->AddSlowPath(slow_path);
2659 
2660   // Load ArtMethod first.
2661   HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2662   DCHECK(invoke_direct != nullptr);
2663   Register temp = codegen_->GenerateCalleeMethodStaticOrDirectCall(
2664       invoke_direct, locations->GetTemp(0)).AsRegister<Register>();
2665 
2666   // Now get declaring class.
2667   __ ldr(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
2668 
2669   uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2670   uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2671   DCHECK_NE(slow_path_flag_offset, 0u);
2672   DCHECK_NE(disable_flag_offset, 0u);
2673   DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2674 
2675   // Check static flags that prevent using intrinsic.
2676   __ ldr(IP, Address(temp, disable_flag_offset));
2677   __ ldr(temp, Address(temp, slow_path_flag_offset));
2678   __ orr(IP, IP, ShifterOperand(temp));
2679   __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
2680 
2681   // Fast path.
2682   __ ldr(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
2683   codegen_->MaybeRecordImplicitNullCheck(invoke);
2684   __ MaybeUnpoisonHeapReference(out);
2685   __ Bind(slow_path->GetExitLabel());
2686 }
2687 
VisitIntegerValueOf(HInvoke * invoke)2688 void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) {
2689   InvokeRuntimeCallingConvention calling_convention;
2690   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2691       invoke,
2692       codegen_,
2693       Location::RegisterLocation(R0),
2694       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2695 }
2696 
VisitIntegerValueOf(HInvoke * invoke)2697 void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) {
2698   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
2699   LocationSummary* locations = invoke->GetLocations();
2700   ArmAssembler* const assembler = GetAssembler();
2701 
2702   Register out = locations->Out().AsRegister<Register>();
2703   InvokeRuntimeCallingConvention calling_convention;
2704   Register argument = calling_convention.GetRegisterAt(0);
2705   if (invoke->InputAt(0)->IsConstant()) {
2706     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2707     if (value >= info.low && value <= info.high) {
2708       // Just embed the j.l.Integer in the code.
2709       ScopedObjectAccess soa(Thread::Current());
2710       mirror::Object* boxed = info.cache->Get(value + (-info.low));
2711       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
2712       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
2713       __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
2714     } else {
2715       // Allocate and initialize a new j.l.Integer.
2716       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2717       // JIT object table.
2718       uint32_t address =
2719           dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
2720       __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
2721       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2722       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2723       __ LoadImmediate(IP, value);
2724       __ StoreToOffset(kStoreWord, IP, out, info.value_offset);
2725       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2726       // one.
2727       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2728     }
2729   } else {
2730     Register in = locations->InAt(0).AsRegister<Register>();
2731     // Check bounds of our cache.
2732     __ AddConstant(out, in, -info.low);
2733     __ CmpConstant(out, info.high - info.low + 1);
2734     Label allocate, done;
2735     __ b(&allocate, HS);
2736     // If the value is within the bounds, load the j.l.Integer directly from the array.
2737     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
2738     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
2739     __ LoadLiteral(IP, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
2740     codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), IP, out);
2741     __ MaybeUnpoisonHeapReference(out);
2742     __ b(&done);
2743     __ Bind(&allocate);
2744     // Otherwise allocate and initialize a new j.l.Integer.
2745     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
2746     __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
2747     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2748     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2749     __ StoreToOffset(kStoreWord, in, out, info.value_offset);
2750     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2751     // one.
2752     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2753     __ Bind(&done);
2754   }
2755 }
2756 
2757 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
2758 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
2759 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
2760 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxFloatFloat)
2761 UNIMPLEMENTED_INTRINSIC(ARM, MathMinLongLong)
2762 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxLongLong)
2763 UNIMPLEMENTED_INTRINSIC(ARM, MathCeil)          // Could be done by changing rounding mode, maybe?
2764 UNIMPLEMENTED_INTRINSIC(ARM, MathFloor)         // Could be done by changing rounding mode, maybe?
2765 UNIMPLEMENTED_INTRINSIC(ARM, MathRint)
2766 UNIMPLEMENTED_INTRINSIC(ARM, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
2767 UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
2768 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong)     // High register pressure.
2769 UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
2770 UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
2771 UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
2772 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
2773 UNIMPLEMENTED_INTRINSIC(ARM, LongLowestOneBit)
2774 
2775 UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOf);
2776 UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOfAfter);
2777 UNIMPLEMENTED_INTRINSIC(ARM, StringBufferAppend);
2778 UNIMPLEMENTED_INTRINSIC(ARM, StringBufferLength);
2779 UNIMPLEMENTED_INTRINSIC(ARM, StringBufferToString);
2780 UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderAppend);
2781 UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderLength);
2782 UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderToString);
2783 
2784 // 1.8.
2785 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddInt)
2786 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddLong)
2787 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetInt)
2788 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetLong)
2789 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetObject)
2790 
2791 UNREACHABLE_INTRINSICS(ARM)
2792 
2793 #undef __
2794 
2795 }  // namespace arm
2796 }  // namespace art
2797