1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm.h"
18
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "art_method.h"
21 #include "code_generator_arm.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "intrinsics.h"
24 #include "intrinsics_utils.h"
25 #include "lock_word.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/object_array-inl.h"
28 #include "mirror/reference.h"
29 #include "mirror/string.h"
30 #include "scoped_thread_state_change-inl.h"
31 #include "thread-inl.h"
32 #include "utils/arm/assembler_arm.h"
33
34 namespace art {
35
36 namespace arm {
37
GetAssembler()38 ArmAssembler* IntrinsicCodeGeneratorARM::GetAssembler() {
39 return codegen_->GetAssembler();
40 }
41
GetAllocator()42 ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
43 return codegen_->GetGraph()->GetArena();
44 }
45
46 using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
47
48 #define __ assembler->
49
50 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmAssembler * assembler,Primitive::Type type,const Register & array,const Location & pos,const Register & base)51 static void GenSystemArrayCopyBaseAddress(ArmAssembler* assembler,
52 Primitive::Type type,
53 const Register& array,
54 const Location& pos,
55 const Register& base) {
56 // This routine is only used by the SystemArrayCopy intrinsic at the
57 // moment. We can allow Primitive::kPrimNot as `type` to implement
58 // the SystemArrayCopyChar intrinsic.
59 DCHECK_EQ(type, Primitive::kPrimNot);
60 const int32_t element_size = Primitive::ComponentSize(type);
61 const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
62 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
63
64 if (pos.IsConstant()) {
65 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
66 __ AddConstant(base, array, element_size * constant + data_offset);
67 } else {
68 __ add(base, array, ShifterOperand(pos.AsRegister<Register>(), LSL, element_size_shift));
69 __ AddConstant(base, data_offset);
70 }
71 }
72
73 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmAssembler * assembler,Primitive::Type type,const Location & copy_length,const Register & base,const Register & end)74 static void GenSystemArrayCopyEndAddress(ArmAssembler* assembler,
75 Primitive::Type type,
76 const Location& copy_length,
77 const Register& base,
78 const Register& end) {
79 // This routine is only used by the SystemArrayCopy intrinsic at the
80 // moment. We can allow Primitive::kPrimNot as `type` to implement
81 // the SystemArrayCopyChar intrinsic.
82 DCHECK_EQ(type, Primitive::kPrimNot);
83 const int32_t element_size = Primitive::ComponentSize(type);
84 const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
85
86 if (copy_length.IsConstant()) {
87 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
88 __ AddConstant(end, base, element_size * constant);
89 } else {
90 __ add(end, base, ShifterOperand(copy_length.AsRegister<Register>(), LSL, element_size_shift));
91 }
92 }
93
94 #undef __
95
96 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
97 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
98
99 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
100 class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
101 public:
ReadBarrierSystemArrayCopySlowPathARM(HInstruction * instruction)102 explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction)
103 : SlowPathCode(instruction) {
104 DCHECK(kEmitCompilerReadBarrier);
105 DCHECK(kUseBakerReadBarrier);
106 }
107
EmitNativeCode(CodeGenerator * codegen)108 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
109 CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
110 ArmAssembler* assembler = arm_codegen->GetAssembler();
111 LocationSummary* locations = instruction_->GetLocations();
112 DCHECK(locations->CanCall());
113 DCHECK(instruction_->IsInvokeStaticOrDirect())
114 << "Unexpected instruction in read barrier arraycopy slow path: "
115 << instruction_->DebugName();
116 DCHECK(instruction_->GetLocations()->Intrinsified());
117 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
118
119 Primitive::Type type = Primitive::kPrimNot;
120 const int32_t element_size = Primitive::ComponentSize(type);
121
122 Register dest = locations->InAt(2).AsRegister<Register>();
123 Location dest_pos = locations->InAt(3);
124 Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
125 Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
126 Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
127 Register tmp = locations->GetTemp(3).AsRegister<Register>();
128
129 __ Bind(GetEntryLabel());
130 // Compute the base destination address in `dst_curr_addr`.
131 GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
132
133 Label loop;
134 __ Bind(&loop);
135 __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex));
136 __ MaybeUnpoisonHeapReference(tmp);
137 // TODO: Inline the mark bit check before calling the runtime?
138 // tmp = ReadBarrier::Mark(tmp);
139 // No need to save live registers; it's taken care of by the
140 // entrypoint. Also, there is no need to update the stack mask,
141 // as this runtime call will not trigger a garbage collection.
142 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
143 // explanations.)
144 DCHECK_NE(tmp, SP);
145 DCHECK_NE(tmp, LR);
146 DCHECK_NE(tmp, PC);
147 // IP is used internally by the ReadBarrierMarkRegX entry point
148 // as a temporary (and not preserved). It thus cannot be used by
149 // any live register in this slow path.
150 DCHECK_NE(src_curr_addr, IP);
151 DCHECK_NE(dst_curr_addr, IP);
152 DCHECK_NE(src_stop_addr, IP);
153 DCHECK_NE(tmp, IP);
154 DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
155 // TODO: Load the entrypoint once before the loop, instead of
156 // loading it at every iteration.
157 int32_t entry_point_offset =
158 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
159 // This runtime call does not require a stack map.
160 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
161 __ MaybePoisonHeapReference(tmp);
162 __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex));
163 __ cmp(src_curr_addr, ShifterOperand(src_stop_addr));
164 __ b(&loop, NE);
165 __ b(GetExitLabel());
166 }
167
GetDescription() const168 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; }
169
170 private:
171 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM);
172 };
173
174 #undef __
175
IntrinsicLocationsBuilderARM(CodeGeneratorARM * codegen)176 IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen)
177 : arena_(codegen->GetGraph()->GetArena()),
178 codegen_(codegen),
179 assembler_(codegen->GetAssembler()),
180 features_(codegen->GetInstructionSetFeatures()) {}
181
TryDispatch(HInvoke * invoke)182 bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
183 Dispatch(invoke);
184 LocationSummary* res = invoke->GetLocations();
185 if (res == nullptr) {
186 return false;
187 }
188 return res->Intrinsified();
189 }
190
191 #define __ assembler->
192
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)193 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
194 LocationSummary* locations = new (arena) LocationSummary(invoke,
195 LocationSummary::kNoCall,
196 kIntrinsified);
197 locations->SetInAt(0, Location::RequiresFpuRegister());
198 locations->SetOut(Location::RequiresRegister());
199 }
200
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)201 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
202 LocationSummary* locations = new (arena) LocationSummary(invoke,
203 LocationSummary::kNoCall,
204 kIntrinsified);
205 locations->SetInAt(0, Location::RequiresRegister());
206 locations->SetOut(Location::RequiresFpuRegister());
207 }
208
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmAssembler * assembler)209 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
210 Location input = locations->InAt(0);
211 Location output = locations->Out();
212 if (is64bit) {
213 __ vmovrrd(output.AsRegisterPairLow<Register>(),
214 output.AsRegisterPairHigh<Register>(),
215 FromLowSToD(input.AsFpuRegisterPairLow<SRegister>()));
216 } else {
217 __ vmovrs(output.AsRegister<Register>(), input.AsFpuRegister<SRegister>());
218 }
219 }
220
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmAssembler * assembler)221 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
222 Location input = locations->InAt(0);
223 Location output = locations->Out();
224 if (is64bit) {
225 __ vmovdrr(FromLowSToD(output.AsFpuRegisterPairLow<SRegister>()),
226 input.AsRegisterPairLow<Register>(),
227 input.AsRegisterPairHigh<Register>());
228 } else {
229 __ vmovsr(output.AsFpuRegister<SRegister>(), input.AsRegister<Register>());
230 }
231 }
232
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)233 void IntrinsicLocationsBuilderARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
234 CreateFPToIntLocations(arena_, invoke);
235 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)236 void IntrinsicLocationsBuilderARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
237 CreateIntToFPLocations(arena_, invoke);
238 }
239
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)240 void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
241 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
242 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)243 void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
244 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
245 }
246
VisitFloatFloatToRawIntBits(HInvoke * invoke)247 void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
248 CreateFPToIntLocations(arena_, invoke);
249 }
VisitFloatIntBitsToFloat(HInvoke * invoke)250 void IntrinsicLocationsBuilderARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
251 CreateIntToFPLocations(arena_, invoke);
252 }
253
VisitFloatFloatToRawIntBits(HInvoke * invoke)254 void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
255 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
256 }
VisitFloatIntBitsToFloat(HInvoke * invoke)257 void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
258 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
259 }
260
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)261 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
262 LocationSummary* locations = new (arena) LocationSummary(invoke,
263 LocationSummary::kNoCall,
264 kIntrinsified);
265 locations->SetInAt(0, Location::RequiresRegister());
266 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
267 }
268
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)269 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
270 LocationSummary* locations = new (arena) LocationSummary(invoke,
271 LocationSummary::kNoCall,
272 kIntrinsified);
273 locations->SetInAt(0, Location::RequiresFpuRegister());
274 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
275 }
276
GenNumberOfLeadingZeros(HInvoke * invoke,Primitive::Type type,CodeGeneratorARM * codegen)277 static void GenNumberOfLeadingZeros(HInvoke* invoke,
278 Primitive::Type type,
279 CodeGeneratorARM* codegen) {
280 ArmAssembler* assembler = codegen->GetAssembler();
281 LocationSummary* locations = invoke->GetLocations();
282 Location in = locations->InAt(0);
283 Register out = locations->Out().AsRegister<Register>();
284
285 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
286
287 if (type == Primitive::kPrimLong) {
288 Register in_reg_lo = in.AsRegisterPairLow<Register>();
289 Register in_reg_hi = in.AsRegisterPairHigh<Register>();
290 Label end;
291 Label* final_label = codegen->GetFinalLabel(invoke, &end);
292 __ clz(out, in_reg_hi);
293 __ CompareAndBranchIfNonZero(in_reg_hi, final_label);
294 __ clz(out, in_reg_lo);
295 __ AddConstant(out, 32);
296 if (end.IsLinked()) {
297 __ Bind(&end);
298 }
299 } else {
300 __ clz(out, in.AsRegister<Register>());
301 }
302 }
303
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)304 void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
305 CreateIntToIntLocations(arena_, invoke);
306 }
307
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)308 void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
309 GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
310 }
311
VisitLongNumberOfLeadingZeros(HInvoke * invoke)312 void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
313 LocationSummary* locations = new (arena_) LocationSummary(invoke,
314 LocationSummary::kNoCall,
315 kIntrinsified);
316 locations->SetInAt(0, Location::RequiresRegister());
317 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
318 }
319
VisitLongNumberOfLeadingZeros(HInvoke * invoke)320 void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
321 GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
322 }
323
GenNumberOfTrailingZeros(HInvoke * invoke,Primitive::Type type,CodeGeneratorARM * codegen)324 static void GenNumberOfTrailingZeros(HInvoke* invoke,
325 Primitive::Type type,
326 CodeGeneratorARM* codegen) {
327 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
328
329 ArmAssembler* assembler = codegen->GetAssembler();
330 LocationSummary* locations = invoke->GetLocations();
331 Register out = locations->Out().AsRegister<Register>();
332
333 if (type == Primitive::kPrimLong) {
334 Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
335 Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
336 Label end;
337 Label* final_label = codegen->GetFinalLabel(invoke, &end);
338 __ rbit(out, in_reg_lo);
339 __ clz(out, out);
340 __ CompareAndBranchIfNonZero(in_reg_lo, final_label);
341 __ rbit(out, in_reg_hi);
342 __ clz(out, out);
343 __ AddConstant(out, 32);
344 if (end.IsLinked()) {
345 __ Bind(&end);
346 }
347 } else {
348 Register in = locations->InAt(0).AsRegister<Register>();
349 __ rbit(out, in);
350 __ clz(out, out);
351 }
352 }
353
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)354 void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
355 LocationSummary* locations = new (arena_) LocationSummary(invoke,
356 LocationSummary::kNoCall,
357 kIntrinsified);
358 locations->SetInAt(0, Location::RequiresRegister());
359 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
360 }
361
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)362 void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
363 GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
364 }
365
VisitLongNumberOfTrailingZeros(HInvoke * invoke)366 void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
367 LocationSummary* locations = new (arena_) LocationSummary(invoke,
368 LocationSummary::kNoCall,
369 kIntrinsified);
370 locations->SetInAt(0, Location::RequiresRegister());
371 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
372 }
373
VisitLongNumberOfTrailingZeros(HInvoke * invoke)374 void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
375 GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
376 }
377
MathAbsFP(LocationSummary * locations,bool is64bit,ArmAssembler * assembler)378 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
379 Location in = locations->InAt(0);
380 Location out = locations->Out();
381
382 if (is64bit) {
383 __ vabsd(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
384 FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
385 } else {
386 __ vabss(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>());
387 }
388 }
389
VisitMathAbsDouble(HInvoke * invoke)390 void IntrinsicLocationsBuilderARM::VisitMathAbsDouble(HInvoke* invoke) {
391 CreateFPToFPLocations(arena_, invoke);
392 }
393
VisitMathAbsDouble(HInvoke * invoke)394 void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) {
395 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
396 }
397
VisitMathAbsFloat(HInvoke * invoke)398 void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) {
399 CreateFPToFPLocations(arena_, invoke);
400 }
401
VisitMathAbsFloat(HInvoke * invoke)402 void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) {
403 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
404 }
405
CreateIntToIntPlusTemp(ArenaAllocator * arena,HInvoke * invoke)406 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
407 LocationSummary* locations = new (arena) LocationSummary(invoke,
408 LocationSummary::kNoCall,
409 kIntrinsified);
410 locations->SetInAt(0, Location::RequiresRegister());
411 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
412
413 locations->AddTemp(Location::RequiresRegister());
414 }
415
GenAbsInteger(LocationSummary * locations,bool is64bit,ArmAssembler * assembler)416 static void GenAbsInteger(LocationSummary* locations,
417 bool is64bit,
418 ArmAssembler* assembler) {
419 Location in = locations->InAt(0);
420 Location output = locations->Out();
421
422 Register mask = locations->GetTemp(0).AsRegister<Register>();
423
424 if (is64bit) {
425 Register in_reg_lo = in.AsRegisterPairLow<Register>();
426 Register in_reg_hi = in.AsRegisterPairHigh<Register>();
427 Register out_reg_lo = output.AsRegisterPairLow<Register>();
428 Register out_reg_hi = output.AsRegisterPairHigh<Register>();
429
430 DCHECK_NE(out_reg_lo, in_reg_hi) << "Diagonal overlap unexpected.";
431
432 __ Asr(mask, in_reg_hi, 31);
433 __ adds(out_reg_lo, in_reg_lo, ShifterOperand(mask));
434 __ adc(out_reg_hi, in_reg_hi, ShifterOperand(mask));
435 __ eor(out_reg_lo, mask, ShifterOperand(out_reg_lo));
436 __ eor(out_reg_hi, mask, ShifterOperand(out_reg_hi));
437 } else {
438 Register in_reg = in.AsRegister<Register>();
439 Register out_reg = output.AsRegister<Register>();
440
441 __ Asr(mask, in_reg, 31);
442 __ add(out_reg, in_reg, ShifterOperand(mask));
443 __ eor(out_reg, mask, ShifterOperand(out_reg));
444 }
445 }
446
VisitMathAbsInt(HInvoke * invoke)447 void IntrinsicLocationsBuilderARM::VisitMathAbsInt(HInvoke* invoke) {
448 CreateIntToIntPlusTemp(arena_, invoke);
449 }
450
VisitMathAbsInt(HInvoke * invoke)451 void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) {
452 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
453 }
454
455
VisitMathAbsLong(HInvoke * invoke)456 void IntrinsicLocationsBuilderARM::VisitMathAbsLong(HInvoke* invoke) {
457 CreateIntToIntPlusTemp(arena_, invoke);
458 }
459
VisitMathAbsLong(HInvoke * invoke)460 void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) {
461 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
462 }
463
GenMinMax(LocationSummary * locations,bool is_min,ArmAssembler * assembler)464 static void GenMinMax(LocationSummary* locations,
465 bool is_min,
466 ArmAssembler* assembler) {
467 Register op1 = locations->InAt(0).AsRegister<Register>();
468 Register op2 = locations->InAt(1).AsRegister<Register>();
469 Register out = locations->Out().AsRegister<Register>();
470
471 __ cmp(op1, ShifterOperand(op2));
472
473 __ it((is_min) ? Condition::LT : Condition::GT, kItElse);
474 __ mov(out, ShifterOperand(op1), is_min ? Condition::LT : Condition::GT);
475 __ mov(out, ShifterOperand(op2), is_min ? Condition::GE : Condition::LE);
476 }
477
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)478 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
479 LocationSummary* locations = new (arena) LocationSummary(invoke,
480 LocationSummary::kNoCall,
481 kIntrinsified);
482 locations->SetInAt(0, Location::RequiresRegister());
483 locations->SetInAt(1, Location::RequiresRegister());
484 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
485 }
486
VisitMathMinIntInt(HInvoke * invoke)487 void IntrinsicLocationsBuilderARM::VisitMathMinIntInt(HInvoke* invoke) {
488 CreateIntIntToIntLocations(arena_, invoke);
489 }
490
VisitMathMinIntInt(HInvoke * invoke)491 void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) {
492 GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
493 }
494
VisitMathMaxIntInt(HInvoke * invoke)495 void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) {
496 CreateIntIntToIntLocations(arena_, invoke);
497 }
498
VisitMathMaxIntInt(HInvoke * invoke)499 void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) {
500 GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
501 }
502
VisitMathSqrt(HInvoke * invoke)503 void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) {
504 CreateFPToFPLocations(arena_, invoke);
505 }
506
VisitMathSqrt(HInvoke * invoke)507 void IntrinsicCodeGeneratorARM::VisitMathSqrt(HInvoke* invoke) {
508 LocationSummary* locations = invoke->GetLocations();
509 ArmAssembler* assembler = GetAssembler();
510 __ vsqrtd(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
511 FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
512 }
513
VisitMemoryPeekByte(HInvoke * invoke)514 void IntrinsicLocationsBuilderARM::VisitMemoryPeekByte(HInvoke* invoke) {
515 CreateIntToIntLocations(arena_, invoke);
516 }
517
VisitMemoryPeekByte(HInvoke * invoke)518 void IntrinsicCodeGeneratorARM::VisitMemoryPeekByte(HInvoke* invoke) {
519 ArmAssembler* assembler = GetAssembler();
520 // Ignore upper 4B of long address.
521 __ ldrsb(invoke->GetLocations()->Out().AsRegister<Register>(),
522 Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
523 }
524
VisitMemoryPeekIntNative(HInvoke * invoke)525 void IntrinsicLocationsBuilderARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
526 CreateIntToIntLocations(arena_, invoke);
527 }
528
VisitMemoryPeekIntNative(HInvoke * invoke)529 void IntrinsicCodeGeneratorARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
530 ArmAssembler* assembler = GetAssembler();
531 // Ignore upper 4B of long address.
532 __ ldr(invoke->GetLocations()->Out().AsRegister<Register>(),
533 Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
534 }
535
VisitMemoryPeekLongNative(HInvoke * invoke)536 void IntrinsicLocationsBuilderARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
537 CreateIntToIntLocations(arena_, invoke);
538 }
539
VisitMemoryPeekLongNative(HInvoke * invoke)540 void IntrinsicCodeGeneratorARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
541 ArmAssembler* assembler = GetAssembler();
542 // Ignore upper 4B of long address.
543 Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
544 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
545 // exception. So we can't use ldrd as addr may be unaligned.
546 Register lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>();
547 Register hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>();
548 if (addr == lo) {
549 __ ldr(hi, Address(addr, 4));
550 __ ldr(lo, Address(addr, 0));
551 } else {
552 __ ldr(lo, Address(addr, 0));
553 __ ldr(hi, Address(addr, 4));
554 }
555 }
556
VisitMemoryPeekShortNative(HInvoke * invoke)557 void IntrinsicLocationsBuilderARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
558 CreateIntToIntLocations(arena_, invoke);
559 }
560
VisitMemoryPeekShortNative(HInvoke * invoke)561 void IntrinsicCodeGeneratorARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
562 ArmAssembler* assembler = GetAssembler();
563 // Ignore upper 4B of long address.
564 __ ldrsh(invoke->GetLocations()->Out().AsRegister<Register>(),
565 Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
566 }
567
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)568 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
569 LocationSummary* locations = new (arena) LocationSummary(invoke,
570 LocationSummary::kNoCall,
571 kIntrinsified);
572 locations->SetInAt(0, Location::RequiresRegister());
573 locations->SetInAt(1, Location::RequiresRegister());
574 }
575
VisitMemoryPokeByte(HInvoke * invoke)576 void IntrinsicLocationsBuilderARM::VisitMemoryPokeByte(HInvoke* invoke) {
577 CreateIntIntToVoidLocations(arena_, invoke);
578 }
579
VisitMemoryPokeByte(HInvoke * invoke)580 void IntrinsicCodeGeneratorARM::VisitMemoryPokeByte(HInvoke* invoke) {
581 ArmAssembler* assembler = GetAssembler();
582 __ strb(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
583 Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
584 }
585
VisitMemoryPokeIntNative(HInvoke * invoke)586 void IntrinsicLocationsBuilderARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
587 CreateIntIntToVoidLocations(arena_, invoke);
588 }
589
VisitMemoryPokeIntNative(HInvoke * invoke)590 void IntrinsicCodeGeneratorARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
591 ArmAssembler* assembler = GetAssembler();
592 __ str(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
593 Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
594 }
595
VisitMemoryPokeLongNative(HInvoke * invoke)596 void IntrinsicLocationsBuilderARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
597 CreateIntIntToVoidLocations(arena_, invoke);
598 }
599
VisitMemoryPokeLongNative(HInvoke * invoke)600 void IntrinsicCodeGeneratorARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
601 ArmAssembler* assembler = GetAssembler();
602 // Ignore upper 4B of long address.
603 Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
604 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
605 // exception. So we can't use ldrd as addr may be unaligned.
606 __ str(invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>(), Address(addr, 0));
607 __ str(invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>(), Address(addr, 4));
608 }
609
VisitMemoryPokeShortNative(HInvoke * invoke)610 void IntrinsicLocationsBuilderARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
611 CreateIntIntToVoidLocations(arena_, invoke);
612 }
613
VisitMemoryPokeShortNative(HInvoke * invoke)614 void IntrinsicCodeGeneratorARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
615 ArmAssembler* assembler = GetAssembler();
616 __ strh(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
617 Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
618 }
619
VisitThreadCurrentThread(HInvoke * invoke)620 void IntrinsicLocationsBuilderARM::VisitThreadCurrentThread(HInvoke* invoke) {
621 LocationSummary* locations = new (arena_) LocationSummary(invoke,
622 LocationSummary::kNoCall,
623 kIntrinsified);
624 locations->SetOut(Location::RequiresRegister());
625 }
626
VisitThreadCurrentThread(HInvoke * invoke)627 void IntrinsicCodeGeneratorARM::VisitThreadCurrentThread(HInvoke* invoke) {
628 ArmAssembler* assembler = GetAssembler();
629 __ LoadFromOffset(kLoadWord,
630 invoke->GetLocations()->Out().AsRegister<Register>(),
631 TR,
632 Thread::PeerOffset<kArmPointerSize>().Int32Value());
633 }
634
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile,CodeGeneratorARM * codegen)635 static void GenUnsafeGet(HInvoke* invoke,
636 Primitive::Type type,
637 bool is_volatile,
638 CodeGeneratorARM* codegen) {
639 LocationSummary* locations = invoke->GetLocations();
640 ArmAssembler* assembler = codegen->GetAssembler();
641 Location base_loc = locations->InAt(1);
642 Register base = base_loc.AsRegister<Register>(); // Object pointer.
643 Location offset_loc = locations->InAt(2);
644 Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only.
645 Location trg_loc = locations->Out();
646
647 switch (type) {
648 case Primitive::kPrimInt: {
649 Register trg = trg_loc.AsRegister<Register>();
650 __ ldr(trg, Address(base, offset));
651 if (is_volatile) {
652 __ dmb(ISH);
653 }
654 break;
655 }
656
657 case Primitive::kPrimNot: {
658 Register trg = trg_loc.AsRegister<Register>();
659 if (kEmitCompilerReadBarrier) {
660 if (kUseBakerReadBarrier) {
661 Location temp = locations->GetTemp(0);
662 codegen->GenerateReferenceLoadWithBakerReadBarrier(
663 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
664 if (is_volatile) {
665 __ dmb(ISH);
666 }
667 } else {
668 __ ldr(trg, Address(base, offset));
669 if (is_volatile) {
670 __ dmb(ISH);
671 }
672 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
673 }
674 } else {
675 __ ldr(trg, Address(base, offset));
676 if (is_volatile) {
677 __ dmb(ISH);
678 }
679 __ MaybeUnpoisonHeapReference(trg);
680 }
681 break;
682 }
683
684 case Primitive::kPrimLong: {
685 Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
686 __ add(IP, base, ShifterOperand(offset));
687 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
688 Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
689 __ ldrexd(trg_lo, trg_hi, IP);
690 } else {
691 __ ldrd(trg_lo, Address(IP));
692 }
693 if (is_volatile) {
694 __ dmb(ISH);
695 }
696 break;
697 }
698
699 default:
700 LOG(FATAL) << "Unexpected type " << type;
701 UNREACHABLE();
702 }
703 }
704
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)705 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
706 HInvoke* invoke,
707 Primitive::Type type) {
708 bool can_call = kEmitCompilerReadBarrier &&
709 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
710 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
711 LocationSummary* locations = new (arena) LocationSummary(invoke,
712 (can_call
713 ? LocationSummary::kCallOnSlowPath
714 : LocationSummary::kNoCall),
715 kIntrinsified);
716 if (can_call && kUseBakerReadBarrier) {
717 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
718 }
719 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
720 locations->SetInAt(1, Location::RequiresRegister());
721 locations->SetInAt(2, Location::RequiresRegister());
722 locations->SetOut(Location::RequiresRegister(),
723 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
724 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
725 // We need a temporary register for the read barrier marking slow
726 // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
727 locations->AddTemp(Location::RequiresRegister());
728 }
729 }
730
VisitUnsafeGet(HInvoke * invoke)731 void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
732 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
733 }
VisitUnsafeGetVolatile(HInvoke * invoke)734 void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
735 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
736 }
VisitUnsafeGetLong(HInvoke * invoke)737 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
738 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
739 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)740 void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
741 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
742 }
VisitUnsafeGetObject(HInvoke * invoke)743 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
744 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
745 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)746 void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
747 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
748 }
749
VisitUnsafeGet(HInvoke * invoke)750 void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
751 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
752 }
VisitUnsafeGetVolatile(HInvoke * invoke)753 void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
754 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
755 }
VisitUnsafeGetLong(HInvoke * invoke)756 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) {
757 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
758 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)759 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
760 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
761 }
VisitUnsafeGetObject(HInvoke * invoke)762 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) {
763 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
764 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)765 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
766 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
767 }
768
CreateIntIntIntIntToVoid(ArenaAllocator * arena,const ArmInstructionSetFeatures & features,Primitive::Type type,bool is_volatile,HInvoke * invoke)769 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
770 const ArmInstructionSetFeatures& features,
771 Primitive::Type type,
772 bool is_volatile,
773 HInvoke* invoke) {
774 LocationSummary* locations = new (arena) LocationSummary(invoke,
775 LocationSummary::kNoCall,
776 kIntrinsified);
777 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
778 locations->SetInAt(1, Location::RequiresRegister());
779 locations->SetInAt(2, Location::RequiresRegister());
780 locations->SetInAt(3, Location::RequiresRegister());
781
782 if (type == Primitive::kPrimLong) {
783 // Potentially need temps for ldrexd-strexd loop.
784 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
785 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
786 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
787 }
788 } else if (type == Primitive::kPrimNot) {
789 // Temps for card-marking.
790 locations->AddTemp(Location::RequiresRegister()); // Temp.
791 locations->AddTemp(Location::RequiresRegister()); // Card.
792 }
793 }
794
VisitUnsafePut(HInvoke * invoke)795 void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) {
796 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
797 }
VisitUnsafePutOrdered(HInvoke * invoke)798 void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) {
799 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
800 }
VisitUnsafePutVolatile(HInvoke * invoke)801 void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) {
802 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
803 }
VisitUnsafePutObject(HInvoke * invoke)804 void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) {
805 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
806 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)807 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
808 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
809 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)810 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
811 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
812 }
VisitUnsafePutLong(HInvoke * invoke)813 void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) {
814 CreateIntIntIntIntToVoid(
815 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
816 }
VisitUnsafePutLongOrdered(HInvoke * invoke)817 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
818 CreateIntIntIntIntToVoid(
819 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
820 }
VisitUnsafePutLongVolatile(HInvoke * invoke)821 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
822 CreateIntIntIntIntToVoid(
823 arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
824 }
825
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM * codegen)826 static void GenUnsafePut(LocationSummary* locations,
827 Primitive::Type type,
828 bool is_volatile,
829 bool is_ordered,
830 CodeGeneratorARM* codegen) {
831 ArmAssembler* assembler = codegen->GetAssembler();
832
833 Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
834 Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only.
835 Register value;
836
837 if (is_volatile || is_ordered) {
838 __ dmb(ISH);
839 }
840
841 if (type == Primitive::kPrimLong) {
842 Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
843 value = value_lo;
844 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
845 Register temp_lo = locations->GetTemp(0).AsRegister<Register>();
846 Register temp_hi = locations->GetTemp(1).AsRegister<Register>();
847 Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
848
849 __ add(IP, base, ShifterOperand(offset));
850 Label loop_head;
851 __ Bind(&loop_head);
852 __ ldrexd(temp_lo, temp_hi, IP);
853 __ strexd(temp_lo, value_lo, value_hi, IP);
854 __ cmp(temp_lo, ShifterOperand(0));
855 __ b(&loop_head, NE);
856 } else {
857 __ add(IP, base, ShifterOperand(offset));
858 __ strd(value_lo, Address(IP));
859 }
860 } else {
861 value = locations->InAt(3).AsRegister<Register>();
862 Register source = value;
863 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
864 Register temp = locations->GetTemp(0).AsRegister<Register>();
865 __ Mov(temp, value);
866 __ PoisonHeapReference(temp);
867 source = temp;
868 }
869 __ str(source, Address(base, offset));
870 }
871
872 if (is_volatile) {
873 __ dmb(ISH);
874 }
875
876 if (type == Primitive::kPrimNot) {
877 Register temp = locations->GetTemp(0).AsRegister<Register>();
878 Register card = locations->GetTemp(1).AsRegister<Register>();
879 bool value_can_be_null = true; // TODO: Worth finding out this information?
880 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
881 }
882 }
883
VisitUnsafePut(HInvoke * invoke)884 void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) {
885 GenUnsafePut(invoke->GetLocations(),
886 Primitive::kPrimInt,
887 /* is_volatile */ false,
888 /* is_ordered */ false,
889 codegen_);
890 }
VisitUnsafePutOrdered(HInvoke * invoke)891 void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) {
892 GenUnsafePut(invoke->GetLocations(),
893 Primitive::kPrimInt,
894 /* is_volatile */ false,
895 /* is_ordered */ true,
896 codegen_);
897 }
VisitUnsafePutVolatile(HInvoke * invoke)898 void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) {
899 GenUnsafePut(invoke->GetLocations(),
900 Primitive::kPrimInt,
901 /* is_volatile */ true,
902 /* is_ordered */ false,
903 codegen_);
904 }
VisitUnsafePutObject(HInvoke * invoke)905 void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) {
906 GenUnsafePut(invoke->GetLocations(),
907 Primitive::kPrimNot,
908 /* is_volatile */ false,
909 /* is_ordered */ false,
910 codegen_);
911 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)912 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
913 GenUnsafePut(invoke->GetLocations(),
914 Primitive::kPrimNot,
915 /* is_volatile */ false,
916 /* is_ordered */ true,
917 codegen_);
918 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)919 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
920 GenUnsafePut(invoke->GetLocations(),
921 Primitive::kPrimNot,
922 /* is_volatile */ true,
923 /* is_ordered */ false,
924 codegen_);
925 }
VisitUnsafePutLong(HInvoke * invoke)926 void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) {
927 GenUnsafePut(invoke->GetLocations(),
928 Primitive::kPrimLong,
929 /* is_volatile */ false,
930 /* is_ordered */ false,
931 codegen_);
932 }
VisitUnsafePutLongOrdered(HInvoke * invoke)933 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
934 GenUnsafePut(invoke->GetLocations(),
935 Primitive::kPrimLong,
936 /* is_volatile */ false,
937 /* is_ordered */ true,
938 codegen_);
939 }
VisitUnsafePutLongVolatile(HInvoke * invoke)940 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
941 GenUnsafePut(invoke->GetLocations(),
942 Primitive::kPrimLong,
943 /* is_volatile */ true,
944 /* is_ordered */ false,
945 codegen_);
946 }
947
CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)948 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
949 HInvoke* invoke,
950 Primitive::Type type) {
951 bool can_call = kEmitCompilerReadBarrier &&
952 kUseBakerReadBarrier &&
953 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
954 LocationSummary* locations = new (arena) LocationSummary(invoke,
955 (can_call
956 ? LocationSummary::kCallOnSlowPath
957 : LocationSummary::kNoCall),
958 kIntrinsified);
959 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
960 locations->SetInAt(1, Location::RequiresRegister());
961 locations->SetInAt(2, Location::RequiresRegister());
962 locations->SetInAt(3, Location::RequiresRegister());
963 locations->SetInAt(4, Location::RequiresRegister());
964
965 // If heap poisoning is enabled, we don't want the unpoisoning
966 // operations to potentially clobber the output. Likewise when
967 // emitting a (Baker) read barrier, which may call.
968 Location::OutputOverlap overlaps =
969 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
970 ? Location::kOutputOverlap
971 : Location::kNoOutputOverlap;
972 locations->SetOut(Location::RequiresRegister(), overlaps);
973
974 // Temporary registers used in CAS. In the object case
975 // (UnsafeCASObject intrinsic), these are also used for
976 // card-marking, and possibly for (Baker) read barrier.
977 locations->AddTemp(Location::RequiresRegister()); // Pointer.
978 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
979 }
980
GenCas(HInvoke * invoke,Primitive::Type type,CodeGeneratorARM * codegen)981 static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* codegen) {
982 DCHECK_NE(type, Primitive::kPrimLong);
983
984 ArmAssembler* assembler = codegen->GetAssembler();
985 LocationSummary* locations = invoke->GetLocations();
986
987 Location out_loc = locations->Out();
988 Register out = out_loc.AsRegister<Register>(); // Boolean result.
989
990 Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
991 Location offset_loc = locations->InAt(2);
992 Register offset = offset_loc.AsRegisterPairLow<Register>(); // Offset (discard high 4B).
993 Register expected = locations->InAt(3).AsRegister<Register>(); // Expected.
994 Register value = locations->InAt(4).AsRegister<Register>(); // Value.
995
996 Location tmp_ptr_loc = locations->GetTemp(0);
997 Register tmp_ptr = tmp_ptr_loc.AsRegister<Register>(); // Pointer to actual memory.
998 Register tmp = locations->GetTemp(1).AsRegister<Register>(); // Value in memory.
999
1000 if (type == Primitive::kPrimNot) {
1001 // The only read barrier implementation supporting the
1002 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1003 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1004
1005 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1006 // object and scan the receiver at the next GC for nothing.
1007 bool value_can_be_null = true; // TODO: Worth finding out this information?
1008 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1009
1010 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1011 // Need to make sure the reference stored in the field is a to-space
1012 // one before attempting the CAS or the CAS could fail incorrectly.
1013 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1014 invoke,
1015 out_loc, // Unused, used only as a "temporary" within the read barrier.
1016 base,
1017 /* offset */ 0u,
1018 /* index */ offset_loc,
1019 ScaleFactor::TIMES_1,
1020 tmp_ptr_loc,
1021 /* needs_null_check */ false,
1022 /* always_update_field */ true,
1023 &tmp);
1024 }
1025 }
1026
1027 // Prevent reordering with prior memory operations.
1028 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1029 // latter allows a preceding load to be delayed past the STXR
1030 // instruction below.
1031 __ dmb(ISH);
1032
1033 __ add(tmp_ptr, base, ShifterOperand(offset));
1034
1035 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1036 __ PoisonHeapReference(expected);
1037 if (value == expected) {
1038 // Do not poison `value`, as it is the same register as
1039 // `expected`, which has just been poisoned.
1040 } else {
1041 __ PoisonHeapReference(value);
1042 }
1043 }
1044
1045 // do {
1046 // tmp = [r_ptr] - expected;
1047 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1048 // result = tmp != 0;
1049
1050 Label loop_head;
1051 __ Bind(&loop_head);
1052
1053 __ ldrex(tmp, tmp_ptr);
1054
1055 __ subs(tmp, tmp, ShifterOperand(expected));
1056
1057 __ it(EQ, ItState::kItT);
1058 __ strex(tmp, value, tmp_ptr, EQ);
1059 __ cmp(tmp, ShifterOperand(1), EQ);
1060
1061 __ b(&loop_head, EQ);
1062
1063 __ dmb(ISH);
1064
1065 __ rsbs(out, tmp, ShifterOperand(1));
1066 __ it(CC);
1067 __ mov(out, ShifterOperand(0), CC);
1068
1069 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1070 __ UnpoisonHeapReference(expected);
1071 if (value == expected) {
1072 // Do not unpoison `value`, as it is the same register as
1073 // `expected`, which has just been unpoisoned.
1074 } else {
1075 __ UnpoisonHeapReference(value);
1076 }
1077 }
1078 }
1079
VisitUnsafeCASInt(HInvoke * invoke)1080 void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
1081 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
1082 }
VisitUnsafeCASObject(HInvoke * invoke)1083 void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
1084 // The only read barrier implementation supporting the
1085 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1086 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1087 return;
1088 }
1089
1090 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
1091 }
VisitUnsafeCASInt(HInvoke * invoke)1092 void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
1093 GenCas(invoke, Primitive::kPrimInt, codegen_);
1094 }
VisitUnsafeCASObject(HInvoke * invoke)1095 void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
1096 // The only read barrier implementation supporting the
1097 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1098 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1099
1100 GenCas(invoke, Primitive::kPrimNot, codegen_);
1101 }
1102
VisitStringCompareTo(HInvoke * invoke)1103 void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
1104 // The inputs plus one temp.
1105 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1106 invoke->InputAt(1)->CanBeNull()
1107 ? LocationSummary::kCallOnSlowPath
1108 : LocationSummary::kNoCall,
1109 kIntrinsified);
1110 locations->SetInAt(0, Location::RequiresRegister());
1111 locations->SetInAt(1, Location::RequiresRegister());
1112 locations->AddTemp(Location::RequiresRegister());
1113 locations->AddTemp(Location::RequiresRegister());
1114 locations->AddTemp(Location::RequiresRegister());
1115 // Need temporary registers for String compression's feature.
1116 if (mirror::kUseStringCompression) {
1117 locations->AddTemp(Location::RequiresRegister());
1118 }
1119 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1120 }
1121
VisitStringCompareTo(HInvoke * invoke)1122 void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
1123 ArmAssembler* assembler = GetAssembler();
1124 LocationSummary* locations = invoke->GetLocations();
1125
1126 Register str = locations->InAt(0).AsRegister<Register>();
1127 Register arg = locations->InAt(1).AsRegister<Register>();
1128 Register out = locations->Out().AsRegister<Register>();
1129
1130 Register temp0 = locations->GetTemp(0).AsRegister<Register>();
1131 Register temp1 = locations->GetTemp(1).AsRegister<Register>();
1132 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
1133 Register temp3;
1134 if (mirror::kUseStringCompression) {
1135 temp3 = locations->GetTemp(3).AsRegister<Register>();
1136 }
1137
1138 Label loop;
1139 Label find_char_diff;
1140 Label end;
1141 Label different_compression;
1142
1143 // Get offsets of count and value fields within a string object.
1144 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1145 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1146
1147 // Note that the null check must have been done earlier.
1148 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1149
1150 // Take slow path and throw if input can be and is null.
1151 SlowPathCode* slow_path = nullptr;
1152 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1153 if (can_slow_path) {
1154 slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
1155 codegen_->AddSlowPath(slow_path);
1156 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
1157 }
1158
1159 // Reference equality check, return 0 if same reference.
1160 __ subs(out, str, ShifterOperand(arg));
1161 __ b(&end, EQ);
1162
1163 if (mirror::kUseStringCompression) {
1164 // Load `count` fields of this and argument strings.
1165 __ ldr(temp3, Address(str, count_offset));
1166 __ ldr(temp2, Address(arg, count_offset));
1167 // Extract lengths from the `count` fields.
1168 __ Lsr(temp0, temp3, 1u);
1169 __ Lsr(temp1, temp2, 1u);
1170 } else {
1171 // Load lengths of this and argument strings.
1172 __ ldr(temp0, Address(str, count_offset));
1173 __ ldr(temp1, Address(arg, count_offset));
1174 }
1175 // out = length diff.
1176 __ subs(out, temp0, ShifterOperand(temp1));
1177 // temp0 = min(len(str), len(arg)).
1178 __ it(GT);
1179 __ mov(temp0, ShifterOperand(temp1), GT);
1180 // Shorter string is empty?
1181 __ CompareAndBranchIfZero(temp0, &end);
1182
1183 if (mirror::kUseStringCompression) {
1184 // Check if both strings using same compression style to use this comparison loop.
1185 __ eor(temp2, temp2, ShifterOperand(temp3));
1186 __ Lsrs(temp2, temp2, 1u);
1187 __ b(&different_compression, CS);
1188 // For string compression, calculate the number of bytes to compare (not chars).
1189 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1190 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
1191 __ it(NE);
1192 __ add(temp0, temp0, ShifterOperand(temp0), NE);
1193 }
1194
1195 // Store offset of string value in preparation for comparison loop.
1196 __ mov(temp1, ShifterOperand(value_offset));
1197
1198 // Assertions that must hold in order to compare multiple characters at a time.
1199 CHECK_ALIGNED(value_offset, 8);
1200 static_assert(IsAligned<8>(kObjectAlignment),
1201 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1202
1203 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1204 DCHECK_EQ(char_size, 2u);
1205
1206 Label find_char_diff_2nd_cmp;
1207 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1208 __ Bind(&loop);
1209 __ ldr(IP, Address(str, temp1));
1210 __ ldr(temp2, Address(arg, temp1));
1211 __ cmp(IP, ShifterOperand(temp2));
1212 __ b(&find_char_diff, NE);
1213 __ add(temp1, temp1, ShifterOperand(char_size * 2));
1214
1215 __ ldr(IP, Address(str, temp1));
1216 __ ldr(temp2, Address(arg, temp1));
1217 __ cmp(IP, ShifterOperand(temp2));
1218 __ b(&find_char_diff_2nd_cmp, NE);
1219 __ add(temp1, temp1, ShifterOperand(char_size * 2));
1220 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1221 __ subs(temp0, temp0, ShifterOperand(mirror::kUseStringCompression ? 8 : 4));
1222 __ b(&loop, HI);
1223 __ b(&end);
1224
1225 __ Bind(&find_char_diff_2nd_cmp);
1226 if (mirror::kUseStringCompression) {
1227 __ subs(temp0, temp0, ShifterOperand(4)); // 4 bytes previously compared.
1228 __ b(&end, LS); // Was the second comparison fully beyond the end?
1229 } else {
1230 // Without string compression, we can start treating temp0 as signed
1231 // and rely on the signed comparison below.
1232 __ sub(temp0, temp0, ShifterOperand(2));
1233 }
1234
1235 // Find the single character difference.
1236 __ Bind(&find_char_diff);
1237 // Get the bit position of the first character that differs.
1238 __ eor(temp1, temp2, ShifterOperand(IP));
1239 __ rbit(temp1, temp1);
1240 __ clz(temp1, temp1);
1241
1242 // temp0 = number of characters remaining to compare.
1243 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1244 // in the comparison loop, and after the end of the shorter string data).
1245
1246 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1247 // two words compared, in the interval [0,1].
1248 // (0 for low half-word different, 1 for high half-word different).
1249 // With string compression, (temp1 << 3) = byte where the difference occurs,
1250 // in the interval [0,3].
1251
1252 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1253 // the remaining string data, so just return length diff (out).
1254 // The comparison is unsigned for string compression, otherwise signed.
1255 __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4));
1256 __ b(&end, mirror::kUseStringCompression ? LS : LE);
1257
1258 // Extract the characters and calculate the difference.
1259 if (mirror::kUseStringCompression) {
1260 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1261 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1262 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
1263 __ orr(temp3, temp3, ShifterOperand(0xffu << 23)); // uncompressed ? 0xff800000u : 0x7ff80000u
1264 __ bic(temp1, temp1, ShifterOperand(temp3, LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
1265 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
1266 __ Lsr(temp2, temp2, temp1); // Extract second character.
1267 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
1268 __ Lsr(out, IP, temp1); // Extract first character.
1269 __ and_(temp2, temp2, ShifterOperand(temp3));
1270 __ and_(out, out, ShifterOperand(temp3));
1271 } else {
1272 __ bic(temp1, temp1, ShifterOperand(0xf));
1273 __ Lsr(temp2, temp2, temp1);
1274 __ Lsr(out, IP, temp1);
1275 __ movt(temp2, 0);
1276 __ movt(out, 0);
1277 }
1278
1279 __ sub(out, out, ShifterOperand(temp2));
1280
1281 if (mirror::kUseStringCompression) {
1282 __ b(&end);
1283 __ Bind(&different_compression);
1284
1285 // Comparison for different compression style.
1286 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1287 DCHECK_EQ(c_char_size, 1u);
1288
1289 // We want to free up the temp3, currently holding `str.count`, for comparison.
1290 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1291 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1292 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1293 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1294 __ add(temp0, temp0, ShifterOperand(temp0)); // Unlike LSL, this ADD is always 16-bit.
1295 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1296 __ mov(temp1, ShifterOperand(str));
1297 __ mov(temp2, ShifterOperand(arg));
1298 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
1299 __ it(CS, kItThen); // Interleave with selection of temp1 and temp2.
1300 __ mov(temp1, ShifterOperand(arg), CS); // Preserves flags.
1301 __ mov(temp2, ShifterOperand(str), CS); // Preserves flags.
1302 __ sbc(temp0, temp0, ShifterOperand(0)); // Complete the move of the compression flag.
1303
1304 // Adjust temp1 and temp2 from string pointers to data pointers.
1305 __ add(temp1, temp1, ShifterOperand(value_offset));
1306 __ add(temp2, temp2, ShifterOperand(value_offset));
1307
1308 Label different_compression_loop;
1309 Label different_compression_diff;
1310
1311 // Main loop for different compression.
1312 __ Bind(&different_compression_loop);
1313 __ ldrb(IP, Address(temp1, c_char_size, Address::PostIndex));
1314 __ ldrh(temp3, Address(temp2, char_size, Address::PostIndex));
1315 __ cmp(IP, ShifterOperand(temp3));
1316 __ b(&different_compression_diff, NE);
1317 __ subs(temp0, temp0, ShifterOperand(2));
1318 __ b(&different_compression_loop, HI);
1319 __ b(&end);
1320
1321 // Calculate the difference.
1322 __ Bind(&different_compression_diff);
1323 __ sub(out, IP, ShifterOperand(temp3));
1324 // Flip the difference if the `arg` is compressed.
1325 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1326 __ Lsrs(temp0, temp0, 1u);
1327 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1328 "Expecting 0=compressed, 1=uncompressed");
1329 __ it(CC);
1330 __ rsb(out, out, ShifterOperand(0), CC);
1331 }
1332
1333 __ Bind(&end);
1334
1335 if (can_slow_path) {
1336 __ Bind(slow_path->GetExitLabel());
1337 }
1338 }
1339
VisitStringEquals(HInvoke * invoke)1340 void IntrinsicLocationsBuilderARM::VisitStringEquals(HInvoke* invoke) {
1341 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1342 LocationSummary::kNoCall,
1343 kIntrinsified);
1344 InvokeRuntimeCallingConvention calling_convention;
1345 locations->SetInAt(0, Location::RequiresRegister());
1346 locations->SetInAt(1, Location::RequiresRegister());
1347 // Temporary registers to store lengths of strings and for calculations.
1348 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1349 locations->AddTemp(Location::RegisterLocation(R0));
1350 locations->AddTemp(Location::RequiresRegister());
1351 locations->AddTemp(Location::RequiresRegister());
1352
1353 locations->SetOut(Location::RequiresRegister());
1354 }
1355
VisitStringEquals(HInvoke * invoke)1356 void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
1357 ArmAssembler* assembler = GetAssembler();
1358 LocationSummary* locations = invoke->GetLocations();
1359
1360 Register str = locations->InAt(0).AsRegister<Register>();
1361 Register arg = locations->InAt(1).AsRegister<Register>();
1362 Register out = locations->Out().AsRegister<Register>();
1363
1364 Register temp = locations->GetTemp(0).AsRegister<Register>();
1365 Register temp1 = locations->GetTemp(1).AsRegister<Register>();
1366 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
1367
1368 Label loop;
1369 Label end;
1370 Label return_true;
1371 Label return_false;
1372 Label* final_label = codegen_->GetFinalLabel(invoke, &end);
1373
1374 // Get offsets of count, value, and class fields within a string object.
1375 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1376 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1377 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1378
1379 // Note that the null check must have been done earlier.
1380 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1381
1382 StringEqualsOptimizations optimizations(invoke);
1383 if (!optimizations.GetArgumentNotNull()) {
1384 // Check if input is null, return false if it is.
1385 __ CompareAndBranchIfZero(arg, &return_false);
1386 }
1387
1388 // Reference equality check, return true if same reference.
1389 __ cmp(str, ShifterOperand(arg));
1390 __ b(&return_true, EQ);
1391
1392 if (!optimizations.GetArgumentIsString()) {
1393 // Instanceof check for the argument by comparing class fields.
1394 // All string objects must have the same type since String cannot be subclassed.
1395 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1396 // If the argument is a string object, its class field must be equal to receiver's class field.
1397 __ ldr(temp, Address(str, class_offset));
1398 __ ldr(temp1, Address(arg, class_offset));
1399 __ cmp(temp, ShifterOperand(temp1));
1400 __ b(&return_false, NE);
1401 }
1402
1403 // Load `count` fields of this and argument strings.
1404 __ ldr(temp, Address(str, count_offset));
1405 __ ldr(temp1, Address(arg, count_offset));
1406 // Check if `count` fields are equal, return false if they're not.
1407 // Also compares the compression style, if differs return false.
1408 __ cmp(temp, ShifterOperand(temp1));
1409 __ b(&return_false, NE);
1410 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1411 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1412 "Expecting 0=compressed, 1=uncompressed");
1413 __ cbz(temp, &return_true);
1414
1415 // Assertions that must hold in order to compare strings 4 bytes at a time.
1416 DCHECK_ALIGNED(value_offset, 4);
1417 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1418
1419 if (mirror::kUseStringCompression) {
1420 // For string compression, calculate the number of bytes to compare (not chars).
1421 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1422 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
1423 __ it(CS); // If uncompressed,
1424 __ add(temp, temp, ShifterOperand(temp), CS); // double the byte count.
1425 }
1426
1427 // Store offset of string value in preparation for comparison loop.
1428 __ LoadImmediate(temp1, value_offset);
1429
1430 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1431 // Ok to do this because strings are zero-padded to kObjectAlignment.
1432 __ Bind(&loop);
1433 __ ldr(out, Address(str, temp1));
1434 __ ldr(temp2, Address(arg, temp1));
1435 __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t)));
1436 __ cmp(out, ShifterOperand(temp2));
1437 __ b(&return_false, NE);
1438 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1439 __ subs(temp, temp, ShifterOperand(mirror::kUseStringCompression ? 4 : 2));
1440 __ b(&loop, HI);
1441
1442 // Return true and exit the function.
1443 // If loop does not result in returning false, we return true.
1444 __ Bind(&return_true);
1445 __ LoadImmediate(out, 1);
1446 __ b(final_label);
1447
1448 // Return false and exit the function.
1449 __ Bind(&return_false);
1450 __ LoadImmediate(out, 0);
1451
1452 if (end.IsLinked()) {
1453 __ Bind(&end);
1454 }
1455 }
1456
GenerateVisitStringIndexOf(HInvoke * invoke,ArmAssembler * assembler,CodeGeneratorARM * codegen,ArenaAllocator * allocator,bool start_at_zero)1457 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1458 ArmAssembler* assembler,
1459 CodeGeneratorARM* codegen,
1460 ArenaAllocator* allocator,
1461 bool start_at_zero) {
1462 LocationSummary* locations = invoke->GetLocations();
1463
1464 // Note that the null check must have been done earlier.
1465 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1466
1467 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1468 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1469 SlowPathCode* slow_path = nullptr;
1470 HInstruction* code_point = invoke->InputAt(1);
1471 if (code_point->IsIntConstant()) {
1472 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1473 std::numeric_limits<uint16_t>::max()) {
1474 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1475 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1476 slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
1477 codegen->AddSlowPath(slow_path);
1478 __ b(slow_path->GetEntryLabel());
1479 __ Bind(slow_path->GetExitLabel());
1480 return;
1481 }
1482 } else if (code_point->GetType() != Primitive::kPrimChar) {
1483 Register char_reg = locations->InAt(1).AsRegister<Register>();
1484 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1485 __ cmp(char_reg,
1486 ShifterOperand(static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1));
1487 slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
1488 codegen->AddSlowPath(slow_path);
1489 __ b(slow_path->GetEntryLabel(), HS);
1490 }
1491
1492 if (start_at_zero) {
1493 Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
1494 DCHECK_EQ(tmp_reg, R2);
1495 // Start-index = 0.
1496 __ LoadImmediate(tmp_reg, 0);
1497 }
1498
1499 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1500 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1501
1502 if (slow_path != nullptr) {
1503 __ Bind(slow_path->GetExitLabel());
1504 }
1505 }
1506
VisitStringIndexOf(HInvoke * invoke)1507 void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
1508 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1509 LocationSummary::kCallOnMainAndSlowPath,
1510 kIntrinsified);
1511 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1512 // best to align the inputs accordingly.
1513 InvokeRuntimeCallingConvention calling_convention;
1514 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1515 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1516 locations->SetOut(Location::RegisterLocation(R0));
1517
1518 // Need to send start-index=0.
1519 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1520 }
1521
VisitStringIndexOf(HInvoke * invoke)1522 void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
1523 GenerateVisitStringIndexOf(
1524 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1525 }
1526
VisitStringIndexOfAfter(HInvoke * invoke)1527 void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
1528 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1529 LocationSummary::kCallOnMainAndSlowPath,
1530 kIntrinsified);
1531 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1532 // best to align the inputs accordingly.
1533 InvokeRuntimeCallingConvention calling_convention;
1534 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1535 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1536 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1537 locations->SetOut(Location::RegisterLocation(R0));
1538 }
1539
VisitStringIndexOfAfter(HInvoke * invoke)1540 void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
1541 GenerateVisitStringIndexOf(
1542 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1543 }
1544
VisitStringNewStringFromBytes(HInvoke * invoke)1545 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
1546 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1547 LocationSummary::kCallOnMainAndSlowPath,
1548 kIntrinsified);
1549 InvokeRuntimeCallingConvention calling_convention;
1550 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1551 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1552 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1553 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1554 locations->SetOut(Location::RegisterLocation(R0));
1555 }
1556
VisitStringNewStringFromBytes(HInvoke * invoke)1557 void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
1558 ArmAssembler* assembler = GetAssembler();
1559 LocationSummary* locations = invoke->GetLocations();
1560
1561 Register byte_array = locations->InAt(0).AsRegister<Register>();
1562 __ cmp(byte_array, ShifterOperand(0));
1563 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
1564 codegen_->AddSlowPath(slow_path);
1565 __ b(slow_path->GetEntryLabel(), EQ);
1566
1567 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1568 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1569 __ Bind(slow_path->GetExitLabel());
1570 }
1571
VisitStringNewStringFromChars(HInvoke * invoke)1572 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
1573 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1574 LocationSummary::kCallOnMainOnly,
1575 kIntrinsified);
1576 InvokeRuntimeCallingConvention calling_convention;
1577 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1578 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1579 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1580 locations->SetOut(Location::RegisterLocation(R0));
1581 }
1582
VisitStringNewStringFromChars(HInvoke * invoke)1583 void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) {
1584 // No need to emit code checking whether `locations->InAt(2)` is a null
1585 // pointer, as callers of the native method
1586 //
1587 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1588 //
1589 // all include a null check on `data` before calling that method.
1590 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1591 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1592 }
1593
VisitStringNewStringFromString(HInvoke * invoke)1594 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
1595 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1596 LocationSummary::kCallOnMainAndSlowPath,
1597 kIntrinsified);
1598 InvokeRuntimeCallingConvention calling_convention;
1599 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1600 locations->SetOut(Location::RegisterLocation(R0));
1601 }
1602
VisitStringNewStringFromString(HInvoke * invoke)1603 void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) {
1604 ArmAssembler* assembler = GetAssembler();
1605 LocationSummary* locations = invoke->GetLocations();
1606
1607 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1608 __ cmp(string_to_copy, ShifterOperand(0));
1609 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
1610 codegen_->AddSlowPath(slow_path);
1611 __ b(slow_path->GetEntryLabel(), EQ);
1612
1613 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1614 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1615
1616 __ Bind(slow_path->GetExitLabel());
1617 }
1618
VisitSystemArrayCopy(HInvoke * invoke)1619 void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
1620 // The only read barrier implementation supporting the
1621 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1622 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1623 return;
1624 }
1625
1626 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1627 LocationSummary* locations = invoke->GetLocations();
1628 if (locations == nullptr) {
1629 return;
1630 }
1631
1632 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1633 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1634 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1635
1636 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1637 locations->SetInAt(1, Location::RequiresRegister());
1638 }
1639 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1640 locations->SetInAt(3, Location::RequiresRegister());
1641 }
1642 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1643 locations->SetInAt(4, Location::RequiresRegister());
1644 }
1645 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1646 // Temporary register IP cannot be used in
1647 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1648 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1649 // temporary register from the register allocator.
1650 locations->AddTemp(Location::RequiresRegister());
1651 }
1652 }
1653
CheckPosition(ArmAssembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)1654 static void CheckPosition(ArmAssembler* assembler,
1655 Location pos,
1656 Register input,
1657 Location length,
1658 SlowPathCode* slow_path,
1659 Register temp,
1660 bool length_is_input_length = false) {
1661 // Where is the length in the Array?
1662 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1663
1664 if (pos.IsConstant()) {
1665 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1666 if (pos_const == 0) {
1667 if (!length_is_input_length) {
1668 // Check that length(input) >= length.
1669 __ LoadFromOffset(kLoadWord, temp, input, length_offset);
1670 if (length.IsConstant()) {
1671 __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
1672 } else {
1673 __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
1674 }
1675 __ b(slow_path->GetEntryLabel(), LT);
1676 }
1677 } else {
1678 // Check that length(input) >= pos.
1679 __ LoadFromOffset(kLoadWord, temp, input, length_offset);
1680 __ subs(temp, temp, ShifterOperand(pos_const));
1681 __ b(slow_path->GetEntryLabel(), LT);
1682
1683 // Check that (length(input) - pos) >= length.
1684 if (length.IsConstant()) {
1685 __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
1686 } else {
1687 __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
1688 }
1689 __ b(slow_path->GetEntryLabel(), LT);
1690 }
1691 } else if (length_is_input_length) {
1692 // The only way the copy can succeed is if pos is zero.
1693 Register pos_reg = pos.AsRegister<Register>();
1694 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1695 } else {
1696 // Check that pos >= 0.
1697 Register pos_reg = pos.AsRegister<Register>();
1698 __ cmp(pos_reg, ShifterOperand(0));
1699 __ b(slow_path->GetEntryLabel(), LT);
1700
1701 // Check that pos <= length(input).
1702 __ LoadFromOffset(kLoadWord, temp, input, length_offset);
1703 __ subs(temp, temp, ShifterOperand(pos_reg));
1704 __ b(slow_path->GetEntryLabel(), LT);
1705
1706 // Check that (length(input) - pos) >= length.
1707 if (length.IsConstant()) {
1708 __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue()));
1709 } else {
1710 __ cmp(temp, ShifterOperand(length.AsRegister<Register>()));
1711 }
1712 __ b(slow_path->GetEntryLabel(), LT);
1713 }
1714 }
1715
VisitSystemArrayCopy(HInvoke * invoke)1716 void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
1717 // The only read barrier implementation supporting the
1718 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1719 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1720
1721 ArmAssembler* assembler = GetAssembler();
1722 LocationSummary* locations = invoke->GetLocations();
1723
1724 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1725 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1726 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1727 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1728 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1729
1730 Register src = locations->InAt(0).AsRegister<Register>();
1731 Location src_pos = locations->InAt(1);
1732 Register dest = locations->InAt(2).AsRegister<Register>();
1733 Location dest_pos = locations->InAt(3);
1734 Location length = locations->InAt(4);
1735 Location temp1_loc = locations->GetTemp(0);
1736 Register temp1 = temp1_loc.AsRegister<Register>();
1737 Location temp2_loc = locations->GetTemp(1);
1738 Register temp2 = temp2_loc.AsRegister<Register>();
1739 Location temp3_loc = locations->GetTemp(2);
1740 Register temp3 = temp3_loc.AsRegister<Register>();
1741
1742 SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
1743 codegen_->AddSlowPath(intrinsic_slow_path);
1744
1745 Label conditions_on_positions_validated;
1746 SystemArrayCopyOptimizations optimizations(invoke);
1747
1748 // If source and destination are the same, we go to slow path if we need to do
1749 // forward copying.
1750 if (src_pos.IsConstant()) {
1751 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1752 if (dest_pos.IsConstant()) {
1753 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1754 if (optimizations.GetDestinationIsSource()) {
1755 // Checked when building locations.
1756 DCHECK_GE(src_pos_constant, dest_pos_constant);
1757 } else if (src_pos_constant < dest_pos_constant) {
1758 __ cmp(src, ShifterOperand(dest));
1759 __ b(intrinsic_slow_path->GetEntryLabel(), EQ);
1760 }
1761
1762 // Checked when building locations.
1763 DCHECK(!optimizations.GetDestinationIsSource()
1764 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
1765 } else {
1766 if (!optimizations.GetDestinationIsSource()) {
1767 __ cmp(src, ShifterOperand(dest));
1768 __ b(&conditions_on_positions_validated, NE);
1769 }
1770 __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
1771 __ b(intrinsic_slow_path->GetEntryLabel(), GT);
1772 }
1773 } else {
1774 if (!optimizations.GetDestinationIsSource()) {
1775 __ cmp(src, ShifterOperand(dest));
1776 __ b(&conditions_on_positions_validated, NE);
1777 }
1778 if (dest_pos.IsConstant()) {
1779 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1780 __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos_constant));
1781 } else {
1782 __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
1783 }
1784 __ b(intrinsic_slow_path->GetEntryLabel(), LT);
1785 }
1786
1787 __ Bind(&conditions_on_positions_validated);
1788
1789 if (!optimizations.GetSourceIsNotNull()) {
1790 // Bail out if the source is null.
1791 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1792 }
1793
1794 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1795 // Bail out if the destination is null.
1796 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1797 }
1798
1799 // If the length is negative, bail out.
1800 // We have already checked in the LocationsBuilder for the constant case.
1801 if (!length.IsConstant() &&
1802 !optimizations.GetCountIsSourceLength() &&
1803 !optimizations.GetCountIsDestinationLength()) {
1804 __ cmp(length.AsRegister<Register>(), ShifterOperand(0));
1805 __ b(intrinsic_slow_path->GetEntryLabel(), LT);
1806 }
1807
1808 // Validity checks: source.
1809 CheckPosition(assembler,
1810 src_pos,
1811 src,
1812 length,
1813 intrinsic_slow_path,
1814 temp1,
1815 optimizations.GetCountIsSourceLength());
1816
1817 // Validity checks: dest.
1818 CheckPosition(assembler,
1819 dest_pos,
1820 dest,
1821 length,
1822 intrinsic_slow_path,
1823 temp1,
1824 optimizations.GetCountIsDestinationLength());
1825
1826 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1827 // Check whether all elements of the source array are assignable to the component
1828 // type of the destination array. We do two checks: the classes are the same,
1829 // or the destination is Object[]. If none of these checks succeed, we go to the
1830 // slow path.
1831
1832 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1833 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1834 // /* HeapReference<Class> */ temp1 = src->klass_
1835 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1836 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1837 // Bail out if the source is not a non primitive array.
1838 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1839 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1840 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1841 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
1842 // If heap poisoning is enabled, `temp1` has been unpoisoned
1843 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1844 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1845 __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset);
1846 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1847 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1848 }
1849
1850 // /* HeapReference<Class> */ temp1 = dest->klass_
1851 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1852 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
1853
1854 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1855 // Bail out if the destination is not a non primitive array.
1856 //
1857 // Register `temp1` is not trashed by the read barrier emitted
1858 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1859 // method produces a call to a ReadBarrierMarkRegX entry point,
1860 // which saves all potentially live registers, including
1861 // temporaries such a `temp1`.
1862 // /* HeapReference<Class> */ temp2 = temp1->component_type_
1863 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1864 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
1865 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
1866 // If heap poisoning is enabled, `temp2` has been unpoisoned
1867 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1868 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1869 __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset);
1870 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1871 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
1872 }
1873
1874 // For the same reason given earlier, `temp1` is not trashed by the
1875 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1876 // /* HeapReference<Class> */ temp2 = src->klass_
1877 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1878 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
1879 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1880 __ cmp(temp1, ShifterOperand(temp2));
1881
1882 if (optimizations.GetDestinationIsTypedObjectArray()) {
1883 Label do_copy;
1884 __ b(&do_copy, EQ);
1885 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1886 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1887 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1888 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1889 // We do not need to emit a read barrier for the following
1890 // heap reference load, as `temp1` is only used in a
1891 // comparison with null below, and this reference is not
1892 // kept afterwards.
1893 __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
1894 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1895 __ Bind(&do_copy);
1896 } else {
1897 __ b(intrinsic_slow_path->GetEntryLabel(), NE);
1898 }
1899 } else {
1900 // Non read barrier code.
1901
1902 // /* HeapReference<Class> */ temp1 = dest->klass_
1903 __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
1904 // /* HeapReference<Class> */ temp2 = src->klass_
1905 __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
1906 bool did_unpoison = false;
1907 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1908 !optimizations.GetSourceIsNonPrimitiveArray()) {
1909 // One or two of the references need to be unpoisoned. Unpoison them
1910 // both to make the identity check valid.
1911 __ MaybeUnpoisonHeapReference(temp1);
1912 __ MaybeUnpoisonHeapReference(temp2);
1913 did_unpoison = true;
1914 }
1915
1916 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1917 // Bail out if the destination is not a non primitive array.
1918 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1919 __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
1920 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1921 __ MaybeUnpoisonHeapReference(temp3);
1922 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1923 __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
1924 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1925 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1926 }
1927
1928 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1929 // Bail out if the source is not a non primitive array.
1930 // /* HeapReference<Class> */ temp3 = temp2->component_type_
1931 __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
1932 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1933 __ MaybeUnpoisonHeapReference(temp3);
1934 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1935 __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
1936 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1937 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1938 }
1939
1940 __ cmp(temp1, ShifterOperand(temp2));
1941
1942 if (optimizations.GetDestinationIsTypedObjectArray()) {
1943 Label do_copy;
1944 __ b(&do_copy, EQ);
1945 if (!did_unpoison) {
1946 __ MaybeUnpoisonHeapReference(temp1);
1947 }
1948 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1949 __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
1950 __ MaybeUnpoisonHeapReference(temp1);
1951 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1952 __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
1953 // No need to unpoison the result, we're comparing against null.
1954 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1955 __ Bind(&do_copy);
1956 } else {
1957 __ b(intrinsic_slow_path->GetEntryLabel(), NE);
1958 }
1959 }
1960 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1961 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1962 // Bail out if the source is not a non primitive array.
1963 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1964 // /* HeapReference<Class> */ temp1 = src->klass_
1965 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1966 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1967 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1968 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1969 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1970 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1971 // If heap poisoning is enabled, `temp3` has been unpoisoned
1972 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1973 } else {
1974 // /* HeapReference<Class> */ temp1 = src->klass_
1975 __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
1976 __ MaybeUnpoisonHeapReference(temp1);
1977 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1978 __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
1979 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1980 __ MaybeUnpoisonHeapReference(temp3);
1981 }
1982 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1983 __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
1984 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1985 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1986 }
1987
1988 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1989 // Null constant length: not need to emit the loop code at all.
1990 } else {
1991 Label done;
1992 const Primitive::Type type = Primitive::kPrimNot;
1993 const int32_t element_size = Primitive::ComponentSize(type);
1994
1995 if (length.IsRegister()) {
1996 // Don't enter the copy loop if the length is null.
1997 __ CompareAndBranchIfZero(length.AsRegister<Register>(), &done);
1998 }
1999
2000 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2001 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2002
2003 // SystemArrayCopy implementation for Baker read barriers (see
2004 // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2005 //
2006 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2007 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2008 // bool is_gray = (rb_state == ReadBarrier::GrayState());
2009 // if (is_gray) {
2010 // // Slow-path copy.
2011 // do {
2012 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2013 // } while (src_ptr != end_ptr)
2014 // } else {
2015 // // Fast-path copy.
2016 // do {
2017 // *dest_ptr++ = *src_ptr++;
2018 // } while (src_ptr != end_ptr)
2019 // }
2020
2021 // /* int32_t */ monitor = src->monitor_
2022 __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
2023 // /* LockWord */ lock_word = LockWord(monitor)
2024 static_assert(sizeof(LockWord) == sizeof(int32_t),
2025 "art::LockWord and int32_t have different sizes.");
2026
2027 // Introduce a dependency on the lock_word including the rb_state,
2028 // which shall prevent load-load reordering without using
2029 // a memory barrier (which would be more expensive).
2030 // `src` is unchanged by this operation, but its value now depends
2031 // on `temp2`.
2032 __ add(src, src, ShifterOperand(temp2, LSR, 32));
2033
2034 // Compute the base source address in `temp1`.
2035 // Note that `temp1` (the base source address) is computed from
2036 // `src` (and `src_pos`) here, and thus honors the artificial
2037 // dependency of `src` on `temp2`.
2038 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2039 // Compute the end source address in `temp3`.
2040 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2041 // The base destination address is computed later, as `temp2` is
2042 // used for intermediate computations.
2043
2044 // Slow path used to copy array when `src` is gray.
2045 // Note that the base destination address is computed in `temp2`
2046 // by the slow path code.
2047 SlowPathCode* read_barrier_slow_path =
2048 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
2049 codegen_->AddSlowPath(read_barrier_slow_path);
2050
2051 // Given the numeric representation, it's enough to check the low bit of the
2052 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2053 // which can be a 16-bit instruction unlike the TST immediate.
2054 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2055 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2056 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2057 // Carry flag is the last bit shifted out by LSRS.
2058 __ b(read_barrier_slow_path->GetEntryLabel(), CS);
2059
2060 // Fast-path copy.
2061 // Compute the base destination address in `temp2`.
2062 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2063 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2064 // poison/unpoison.
2065 Label loop;
2066 __ Bind(&loop);
2067 __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
2068 __ str(IP, Address(temp2, element_size, Address::PostIndex));
2069 __ cmp(temp1, ShifterOperand(temp3));
2070 __ b(&loop, NE);
2071
2072 __ Bind(read_barrier_slow_path->GetExitLabel());
2073 } else {
2074 // Non read barrier code.
2075 // Compute the base source address in `temp1`.
2076 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2077 // Compute the base destination address in `temp2`.
2078 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2079 // Compute the end source address in `temp3`.
2080 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2081 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2082 // poison/unpoison.
2083 Label loop;
2084 __ Bind(&loop);
2085 __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
2086 __ str(IP, Address(temp2, element_size, Address::PostIndex));
2087 __ cmp(temp1, ShifterOperand(temp3));
2088 __ b(&loop, NE);
2089 }
2090 __ Bind(&done);
2091 }
2092
2093 // We only need one card marking on the destination array.
2094 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
2095
2096 __ Bind(intrinsic_slow_path->GetExitLabel());
2097 }
2098
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)2099 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2100 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2101 // the code generator. Furthermore, the register allocator creates fixed live intervals
2102 // for all caller-saved registers because we are doing a function call. As a result, if
2103 // the input and output locations are unallocated, the register allocator runs out of
2104 // registers and fails; however, a debuggable graph is not the common case.
2105 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2106 return;
2107 }
2108
2109 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2110 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2111 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2112
2113 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2114 LocationSummary::kCallOnMainOnly,
2115 kIntrinsified);
2116 const InvokeRuntimeCallingConvention calling_convention;
2117
2118 locations->SetInAt(0, Location::RequiresFpuRegister());
2119 locations->SetOut(Location::RequiresFpuRegister());
2120 // Native code uses the soft float ABI.
2121 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2122 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
2123 }
2124
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)2125 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2126 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2127 // the code generator. Furthermore, the register allocator creates fixed live intervals
2128 // for all caller-saved registers because we are doing a function call. As a result, if
2129 // the input and output locations are unallocated, the register allocator runs out of
2130 // registers and fails; however, a debuggable graph is not the common case.
2131 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2132 return;
2133 }
2134
2135 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2136 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2137 DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
2138 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2139
2140 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2141 LocationSummary::kCallOnMainOnly,
2142 kIntrinsified);
2143 const InvokeRuntimeCallingConvention calling_convention;
2144
2145 locations->SetInAt(0, Location::RequiresFpuRegister());
2146 locations->SetInAt(1, Location::RequiresFpuRegister());
2147 locations->SetOut(Location::RequiresFpuRegister());
2148 // Native code uses the soft float ABI.
2149 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2150 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
2151 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
2152 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
2153 }
2154
GenFPToFPCall(HInvoke * invoke,ArmAssembler * assembler,CodeGeneratorARM * codegen,QuickEntrypointEnum entry)2155 static void GenFPToFPCall(HInvoke* invoke,
2156 ArmAssembler* assembler,
2157 CodeGeneratorARM* codegen,
2158 QuickEntrypointEnum entry) {
2159 LocationSummary* const locations = invoke->GetLocations();
2160 const InvokeRuntimeCallingConvention calling_convention;
2161
2162 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2163 DCHECK(locations->WillCall() && locations->Intrinsified());
2164 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0)));
2165 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1)));
2166
2167 // Native code uses the soft float ABI.
2168 __ vmovrrd(calling_convention.GetRegisterAt(0),
2169 calling_convention.GetRegisterAt(1),
2170 FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
2171 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2172 __ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
2173 calling_convention.GetRegisterAt(0),
2174 calling_convention.GetRegisterAt(1));
2175 }
2176
GenFPFPToFPCall(HInvoke * invoke,ArmAssembler * assembler,CodeGeneratorARM * codegen,QuickEntrypointEnum entry)2177 static void GenFPFPToFPCall(HInvoke* invoke,
2178 ArmAssembler* assembler,
2179 CodeGeneratorARM* codegen,
2180 QuickEntrypointEnum entry) {
2181 LocationSummary* const locations = invoke->GetLocations();
2182 const InvokeRuntimeCallingConvention calling_convention;
2183
2184 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2185 DCHECK(locations->WillCall() && locations->Intrinsified());
2186 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(0)));
2187 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(1)));
2188 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(2)));
2189 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(calling_convention.GetRegisterAt(3)));
2190
2191 // Native code uses the soft float ABI.
2192 __ vmovrrd(calling_convention.GetRegisterAt(0),
2193 calling_convention.GetRegisterAt(1),
2194 FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
2195 __ vmovrrd(calling_convention.GetRegisterAt(2),
2196 calling_convention.GetRegisterAt(3),
2197 FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>()));
2198 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2199 __ vmovdrr(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
2200 calling_convention.GetRegisterAt(0),
2201 calling_convention.GetRegisterAt(1));
2202 }
2203
VisitMathCos(HInvoke * invoke)2204 void IntrinsicLocationsBuilderARM::VisitMathCos(HInvoke* invoke) {
2205 CreateFPToFPCallLocations(arena_, invoke);
2206 }
2207
VisitMathCos(HInvoke * invoke)2208 void IntrinsicCodeGeneratorARM::VisitMathCos(HInvoke* invoke) {
2209 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2210 }
2211
VisitMathSin(HInvoke * invoke)2212 void IntrinsicLocationsBuilderARM::VisitMathSin(HInvoke* invoke) {
2213 CreateFPToFPCallLocations(arena_, invoke);
2214 }
2215
VisitMathSin(HInvoke * invoke)2216 void IntrinsicCodeGeneratorARM::VisitMathSin(HInvoke* invoke) {
2217 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2218 }
2219
VisitMathAcos(HInvoke * invoke)2220 void IntrinsicLocationsBuilderARM::VisitMathAcos(HInvoke* invoke) {
2221 CreateFPToFPCallLocations(arena_, invoke);
2222 }
2223
VisitMathAcos(HInvoke * invoke)2224 void IntrinsicCodeGeneratorARM::VisitMathAcos(HInvoke* invoke) {
2225 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2226 }
2227
VisitMathAsin(HInvoke * invoke)2228 void IntrinsicLocationsBuilderARM::VisitMathAsin(HInvoke* invoke) {
2229 CreateFPToFPCallLocations(arena_, invoke);
2230 }
2231
VisitMathAsin(HInvoke * invoke)2232 void IntrinsicCodeGeneratorARM::VisitMathAsin(HInvoke* invoke) {
2233 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2234 }
2235
VisitMathAtan(HInvoke * invoke)2236 void IntrinsicLocationsBuilderARM::VisitMathAtan(HInvoke* invoke) {
2237 CreateFPToFPCallLocations(arena_, invoke);
2238 }
2239
VisitMathAtan(HInvoke * invoke)2240 void IntrinsicCodeGeneratorARM::VisitMathAtan(HInvoke* invoke) {
2241 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2242 }
2243
VisitMathCbrt(HInvoke * invoke)2244 void IntrinsicLocationsBuilderARM::VisitMathCbrt(HInvoke* invoke) {
2245 CreateFPToFPCallLocations(arena_, invoke);
2246 }
2247
VisitMathCbrt(HInvoke * invoke)2248 void IntrinsicCodeGeneratorARM::VisitMathCbrt(HInvoke* invoke) {
2249 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2250 }
2251
VisitMathCosh(HInvoke * invoke)2252 void IntrinsicLocationsBuilderARM::VisitMathCosh(HInvoke* invoke) {
2253 CreateFPToFPCallLocations(arena_, invoke);
2254 }
2255
VisitMathCosh(HInvoke * invoke)2256 void IntrinsicCodeGeneratorARM::VisitMathCosh(HInvoke* invoke) {
2257 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2258 }
2259
VisitMathExp(HInvoke * invoke)2260 void IntrinsicLocationsBuilderARM::VisitMathExp(HInvoke* invoke) {
2261 CreateFPToFPCallLocations(arena_, invoke);
2262 }
2263
VisitMathExp(HInvoke * invoke)2264 void IntrinsicCodeGeneratorARM::VisitMathExp(HInvoke* invoke) {
2265 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2266 }
2267
VisitMathExpm1(HInvoke * invoke)2268 void IntrinsicLocationsBuilderARM::VisitMathExpm1(HInvoke* invoke) {
2269 CreateFPToFPCallLocations(arena_, invoke);
2270 }
2271
VisitMathExpm1(HInvoke * invoke)2272 void IntrinsicCodeGeneratorARM::VisitMathExpm1(HInvoke* invoke) {
2273 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2274 }
2275
VisitMathLog(HInvoke * invoke)2276 void IntrinsicLocationsBuilderARM::VisitMathLog(HInvoke* invoke) {
2277 CreateFPToFPCallLocations(arena_, invoke);
2278 }
2279
VisitMathLog(HInvoke * invoke)2280 void IntrinsicCodeGeneratorARM::VisitMathLog(HInvoke* invoke) {
2281 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2282 }
2283
VisitMathLog10(HInvoke * invoke)2284 void IntrinsicLocationsBuilderARM::VisitMathLog10(HInvoke* invoke) {
2285 CreateFPToFPCallLocations(arena_, invoke);
2286 }
2287
VisitMathLog10(HInvoke * invoke)2288 void IntrinsicCodeGeneratorARM::VisitMathLog10(HInvoke* invoke) {
2289 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2290 }
2291
VisitMathSinh(HInvoke * invoke)2292 void IntrinsicLocationsBuilderARM::VisitMathSinh(HInvoke* invoke) {
2293 CreateFPToFPCallLocations(arena_, invoke);
2294 }
2295
VisitMathSinh(HInvoke * invoke)2296 void IntrinsicCodeGeneratorARM::VisitMathSinh(HInvoke* invoke) {
2297 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2298 }
2299
VisitMathTan(HInvoke * invoke)2300 void IntrinsicLocationsBuilderARM::VisitMathTan(HInvoke* invoke) {
2301 CreateFPToFPCallLocations(arena_, invoke);
2302 }
2303
VisitMathTan(HInvoke * invoke)2304 void IntrinsicCodeGeneratorARM::VisitMathTan(HInvoke* invoke) {
2305 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2306 }
2307
VisitMathTanh(HInvoke * invoke)2308 void IntrinsicLocationsBuilderARM::VisitMathTanh(HInvoke* invoke) {
2309 CreateFPToFPCallLocations(arena_, invoke);
2310 }
2311
VisitMathTanh(HInvoke * invoke)2312 void IntrinsicCodeGeneratorARM::VisitMathTanh(HInvoke* invoke) {
2313 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2314 }
2315
VisitMathAtan2(HInvoke * invoke)2316 void IntrinsicLocationsBuilderARM::VisitMathAtan2(HInvoke* invoke) {
2317 CreateFPFPToFPCallLocations(arena_, invoke);
2318 }
2319
VisitMathAtan2(HInvoke * invoke)2320 void IntrinsicCodeGeneratorARM::VisitMathAtan2(HInvoke* invoke) {
2321 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2322 }
2323
VisitMathHypot(HInvoke * invoke)2324 void IntrinsicLocationsBuilderARM::VisitMathHypot(HInvoke* invoke) {
2325 CreateFPFPToFPCallLocations(arena_, invoke);
2326 }
2327
VisitMathHypot(HInvoke * invoke)2328 void IntrinsicCodeGeneratorARM::VisitMathHypot(HInvoke* invoke) {
2329 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2330 }
2331
VisitMathNextAfter(HInvoke * invoke)2332 void IntrinsicLocationsBuilderARM::VisitMathNextAfter(HInvoke* invoke) {
2333 CreateFPFPToFPCallLocations(arena_, invoke);
2334 }
2335
VisitMathNextAfter(HInvoke * invoke)2336 void IntrinsicCodeGeneratorARM::VisitMathNextAfter(HInvoke* invoke) {
2337 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2338 }
2339
VisitIntegerReverse(HInvoke * invoke)2340 void IntrinsicLocationsBuilderARM::VisitIntegerReverse(HInvoke* invoke) {
2341 CreateIntToIntLocations(arena_, invoke);
2342 }
2343
VisitIntegerReverse(HInvoke * invoke)2344 void IntrinsicCodeGeneratorARM::VisitIntegerReverse(HInvoke* invoke) {
2345 ArmAssembler* assembler = GetAssembler();
2346 LocationSummary* locations = invoke->GetLocations();
2347
2348 Register out = locations->Out().AsRegister<Register>();
2349 Register in = locations->InAt(0).AsRegister<Register>();
2350
2351 __ rbit(out, in);
2352 }
2353
VisitLongReverse(HInvoke * invoke)2354 void IntrinsicLocationsBuilderARM::VisitLongReverse(HInvoke* invoke) {
2355 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2356 LocationSummary::kNoCall,
2357 kIntrinsified);
2358 locations->SetInAt(0, Location::RequiresRegister());
2359 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2360 }
2361
VisitLongReverse(HInvoke * invoke)2362 void IntrinsicCodeGeneratorARM::VisitLongReverse(HInvoke* invoke) {
2363 ArmAssembler* assembler = GetAssembler();
2364 LocationSummary* locations = invoke->GetLocations();
2365
2366 Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
2367 Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
2368 Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
2369 Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
2370
2371 __ rbit(out_reg_lo, in_reg_hi);
2372 __ rbit(out_reg_hi, in_reg_lo);
2373 }
2374
VisitIntegerReverseBytes(HInvoke * invoke)2375 void IntrinsicLocationsBuilderARM::VisitIntegerReverseBytes(HInvoke* invoke) {
2376 CreateIntToIntLocations(arena_, invoke);
2377 }
2378
VisitIntegerReverseBytes(HInvoke * invoke)2379 void IntrinsicCodeGeneratorARM::VisitIntegerReverseBytes(HInvoke* invoke) {
2380 ArmAssembler* assembler = GetAssembler();
2381 LocationSummary* locations = invoke->GetLocations();
2382
2383 Register out = locations->Out().AsRegister<Register>();
2384 Register in = locations->InAt(0).AsRegister<Register>();
2385
2386 __ rev(out, in);
2387 }
2388
VisitLongReverseBytes(HInvoke * invoke)2389 void IntrinsicLocationsBuilderARM::VisitLongReverseBytes(HInvoke* invoke) {
2390 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2391 LocationSummary::kNoCall,
2392 kIntrinsified);
2393 locations->SetInAt(0, Location::RequiresRegister());
2394 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2395 }
2396
VisitLongReverseBytes(HInvoke * invoke)2397 void IntrinsicCodeGeneratorARM::VisitLongReverseBytes(HInvoke* invoke) {
2398 ArmAssembler* assembler = GetAssembler();
2399 LocationSummary* locations = invoke->GetLocations();
2400
2401 Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
2402 Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
2403 Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
2404 Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
2405
2406 __ rev(out_reg_lo, in_reg_hi);
2407 __ rev(out_reg_hi, in_reg_lo);
2408 }
2409
VisitShortReverseBytes(HInvoke * invoke)2410 void IntrinsicLocationsBuilderARM::VisitShortReverseBytes(HInvoke* invoke) {
2411 CreateIntToIntLocations(arena_, invoke);
2412 }
2413
VisitShortReverseBytes(HInvoke * invoke)2414 void IntrinsicCodeGeneratorARM::VisitShortReverseBytes(HInvoke* invoke) {
2415 ArmAssembler* assembler = GetAssembler();
2416 LocationSummary* locations = invoke->GetLocations();
2417
2418 Register out = locations->Out().AsRegister<Register>();
2419 Register in = locations->InAt(0).AsRegister<Register>();
2420
2421 __ revsh(out, in);
2422 }
2423
GenBitCount(HInvoke * instr,Primitive::Type type,ArmAssembler * assembler)2424 static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmAssembler* assembler) {
2425 DCHECK(Primitive::IsIntOrLongType(type)) << type;
2426 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
2427 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
2428
2429 bool is_long = type == Primitive::kPrimLong;
2430 LocationSummary* locations = instr->GetLocations();
2431 Location in = locations->InAt(0);
2432 Register src_0 = is_long ? in.AsRegisterPairLow<Register>() : in.AsRegister<Register>();
2433 Register src_1 = is_long ? in.AsRegisterPairHigh<Register>() : src_0;
2434 SRegister tmp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
2435 DRegister tmp_d = FromLowSToD(tmp_s);
2436 Register out_r = locations->Out().AsRegister<Register>();
2437
2438 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2439 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2440 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2441 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2442 __ vmovdrr(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2443 __ vcntd(tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2444 __ vpaddld(tmp_d, tmp_d, 8, /* is_unsigned */ true); // Temp DReg |--c|--c|--c|--c|
2445 __ vpaddld(tmp_d, tmp_d, 16, /* is_unsigned */ true); // Temp DReg |------c|------c|
2446 if (is_long) {
2447 __ vpaddld(tmp_d, tmp_d, 32, /* is_unsigned */ true); // Temp DReg |--------------c|
2448 }
2449 __ vmovrs(out_r, tmp_s);
2450 }
2451
VisitIntegerBitCount(HInvoke * invoke)2452 void IntrinsicLocationsBuilderARM::VisitIntegerBitCount(HInvoke* invoke) {
2453 CreateIntToIntLocations(arena_, invoke);
2454 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2455 }
2456
VisitIntegerBitCount(HInvoke * invoke)2457 void IntrinsicCodeGeneratorARM::VisitIntegerBitCount(HInvoke* invoke) {
2458 GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
2459 }
2460
VisitLongBitCount(HInvoke * invoke)2461 void IntrinsicLocationsBuilderARM::VisitLongBitCount(HInvoke* invoke) {
2462 VisitIntegerBitCount(invoke);
2463 }
2464
VisitLongBitCount(HInvoke * invoke)2465 void IntrinsicCodeGeneratorARM::VisitLongBitCount(HInvoke* invoke) {
2466 GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
2467 }
2468
VisitStringGetCharsNoCheck(HInvoke * invoke)2469 void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2470 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2471 LocationSummary::kNoCall,
2472 kIntrinsified);
2473 locations->SetInAt(0, Location::RequiresRegister());
2474 locations->SetInAt(1, Location::RequiresRegister());
2475 locations->SetInAt(2, Location::RequiresRegister());
2476 locations->SetInAt(3, Location::RequiresRegister());
2477 locations->SetInAt(4, Location::RequiresRegister());
2478
2479 // Temporary registers to store lengths of strings and for calculations.
2480 locations->AddTemp(Location::RequiresRegister());
2481 locations->AddTemp(Location::RequiresRegister());
2482 locations->AddTemp(Location::RequiresRegister());
2483 }
2484
VisitStringGetCharsNoCheck(HInvoke * invoke)2485 void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2486 ArmAssembler* assembler = GetAssembler();
2487 LocationSummary* locations = invoke->GetLocations();
2488
2489 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2490 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2491 DCHECK_EQ(char_size, 2u);
2492
2493 // Location of data in char array buffer.
2494 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2495
2496 // Location of char array data in string.
2497 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2498
2499 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2500 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2501 Register srcObj = locations->InAt(0).AsRegister<Register>();
2502 Register srcBegin = locations->InAt(1).AsRegister<Register>();
2503 Register srcEnd = locations->InAt(2).AsRegister<Register>();
2504 Register dstObj = locations->InAt(3).AsRegister<Register>();
2505 Register dstBegin = locations->InAt(4).AsRegister<Register>();
2506
2507 Register num_chr = locations->GetTemp(0).AsRegister<Register>();
2508 Register src_ptr = locations->GetTemp(1).AsRegister<Register>();
2509 Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
2510
2511 Label done, compressed_string_loop;
2512 Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2513 // dst to be copied.
2514 __ add(dst_ptr, dstObj, ShifterOperand(data_offset));
2515 __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
2516
2517 __ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
2518 // Early out for valid zero-length retrievals.
2519 __ b(final_label, EQ);
2520
2521 // src range to copy.
2522 __ add(src_ptr, srcObj, ShifterOperand(value_offset));
2523 Label compressed_string_preloop;
2524 if (mirror::kUseStringCompression) {
2525 // Location of count in string.
2526 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2527 // String's length.
2528 __ ldr(IP, Address(srcObj, count_offset));
2529 __ tst(IP, ShifterOperand(1));
2530 __ b(&compressed_string_preloop, EQ);
2531 }
2532 __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
2533
2534 // Do the copy.
2535 Label loop, remainder;
2536
2537 // Save repairing the value of num_chr on the < 4 character path.
2538 __ subs(IP, num_chr, ShifterOperand(4));
2539 __ b(&remainder, LT);
2540
2541 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2542 __ mov(num_chr, ShifterOperand(IP));
2543
2544 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2545 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2546 // to rectify these everywhere this intrinsic applies.)
2547 __ Bind(&loop);
2548 __ ldr(IP, Address(src_ptr, char_size * 2));
2549 __ subs(num_chr, num_chr, ShifterOperand(4));
2550 __ str(IP, Address(dst_ptr, char_size * 2));
2551 __ ldr(IP, Address(src_ptr, char_size * 4, Address::PostIndex));
2552 __ str(IP, Address(dst_ptr, char_size * 4, Address::PostIndex));
2553 __ b(&loop, GE);
2554
2555 __ adds(num_chr, num_chr, ShifterOperand(4));
2556 __ b(final_label, EQ);
2557
2558 // Main loop for < 4 character case and remainder handling. Loads and stores one
2559 // 16-bit Java character at a time.
2560 __ Bind(&remainder);
2561 __ ldrh(IP, Address(src_ptr, char_size, Address::PostIndex));
2562 __ subs(num_chr, num_chr, ShifterOperand(1));
2563 __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
2564 __ b(&remainder, GT);
2565
2566 if (mirror::kUseStringCompression) {
2567 __ b(final_label);
2568
2569 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2570 DCHECK_EQ(c_char_size, 1u);
2571 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2572 __ Bind(&compressed_string_preloop);
2573 __ add(src_ptr, src_ptr, ShifterOperand(srcBegin));
2574 __ Bind(&compressed_string_loop);
2575 __ ldrb(IP, Address(src_ptr, c_char_size, Address::PostIndex));
2576 __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
2577 __ subs(num_chr, num_chr, ShifterOperand(1));
2578 __ b(&compressed_string_loop, GT);
2579 }
2580
2581 if (done.IsLinked()) {
2582 __ Bind(&done);
2583 }
2584 }
2585
VisitFloatIsInfinite(HInvoke * invoke)2586 void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
2587 CreateFPToIntLocations(arena_, invoke);
2588 }
2589
VisitFloatIsInfinite(HInvoke * invoke)2590 void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) {
2591 ArmAssembler* const assembler = GetAssembler();
2592 LocationSummary* const locations = invoke->GetLocations();
2593 const Register out = locations->Out().AsRegister<Register>();
2594 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2595 // we don't care about the sign bit anyway.
2596 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2597
2598 __ vmovrs(out, locations->InAt(0).AsFpuRegister<SRegister>());
2599 // We don't care about the sign bit, so shift left.
2600 __ Lsl(out, out, 1);
2601 __ eor(out, out, ShifterOperand(infinity));
2602 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2603 __ clz(out, out);
2604 // Any number less than 32 logically shifted right by 5 bits results in 0;
2605 // the same operation on 32 yields 1.
2606 __ Lsr(out, out, 5);
2607 }
2608
VisitDoubleIsInfinite(HInvoke * invoke)2609 void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
2610 CreateFPToIntLocations(arena_, invoke);
2611 }
2612
VisitDoubleIsInfinite(HInvoke * invoke)2613 void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
2614 ArmAssembler* const assembler = GetAssembler();
2615 LocationSummary* const locations = invoke->GetLocations();
2616 const Register out = locations->Out().AsRegister<Register>();
2617 // The highest 32 bits of double precision positive infinity separated into
2618 // two constants encodable as immediate operands.
2619 constexpr uint32_t infinity_high = 0x7f000000U;
2620 constexpr uint32_t infinity_high2 = 0x00f00000U;
2621
2622 static_assert((infinity_high | infinity_high2) == static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2623 "The constants do not add up to the high 32 bits of double precision positive infinity.");
2624 __ vmovrrd(IP, out, FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
2625 __ eor(out, out, ShifterOperand(infinity_high));
2626 __ eor(out, out, ShifterOperand(infinity_high2));
2627 // We don't care about the sign bit, so shift left.
2628 __ orr(out, IP, ShifterOperand(out, LSL, 1));
2629 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2630 __ clz(out, out);
2631 // Any number less than 32 logically shifted right by 5 bits results in 0;
2632 // the same operation on 32 yields 1.
2633 __ Lsr(out, out, 5);
2634 }
2635
VisitReferenceGetReferent(HInvoke * invoke)2636 void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) {
2637 if (kEmitCompilerReadBarrier) {
2638 // Do not intrinsify this call with the read barrier configuration.
2639 return;
2640 }
2641 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2642 LocationSummary::kCallOnSlowPath,
2643 kIntrinsified);
2644 locations->SetInAt(0, Location::RequiresRegister());
2645 locations->SetOut(Location::SameAsFirstInput());
2646 locations->AddTemp(Location::RequiresRegister());
2647 }
2648
VisitReferenceGetReferent(HInvoke * invoke)2649 void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) {
2650 DCHECK(!kEmitCompilerReadBarrier);
2651 ArmAssembler* const assembler = GetAssembler();
2652 LocationSummary* locations = invoke->GetLocations();
2653
2654 Register obj = locations->InAt(0).AsRegister<Register>();
2655 Register out = locations->Out().AsRegister<Register>();
2656
2657 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
2658 codegen_->AddSlowPath(slow_path);
2659
2660 // Load ArtMethod first.
2661 HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2662 DCHECK(invoke_direct != nullptr);
2663 Register temp = codegen_->GenerateCalleeMethodStaticOrDirectCall(
2664 invoke_direct, locations->GetTemp(0)).AsRegister<Register>();
2665
2666 // Now get declaring class.
2667 __ ldr(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
2668
2669 uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2670 uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2671 DCHECK_NE(slow_path_flag_offset, 0u);
2672 DCHECK_NE(disable_flag_offset, 0u);
2673 DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2674
2675 // Check static flags that prevent using intrinsic.
2676 __ ldr(IP, Address(temp, disable_flag_offset));
2677 __ ldr(temp, Address(temp, slow_path_flag_offset));
2678 __ orr(IP, IP, ShifterOperand(temp));
2679 __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
2680
2681 // Fast path.
2682 __ ldr(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
2683 codegen_->MaybeRecordImplicitNullCheck(invoke);
2684 __ MaybeUnpoisonHeapReference(out);
2685 __ Bind(slow_path->GetExitLabel());
2686 }
2687
VisitIntegerValueOf(HInvoke * invoke)2688 void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) {
2689 InvokeRuntimeCallingConvention calling_convention;
2690 IntrinsicVisitor::ComputeIntegerValueOfLocations(
2691 invoke,
2692 codegen_,
2693 Location::RegisterLocation(R0),
2694 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2695 }
2696
VisitIntegerValueOf(HInvoke * invoke)2697 void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) {
2698 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
2699 LocationSummary* locations = invoke->GetLocations();
2700 ArmAssembler* const assembler = GetAssembler();
2701
2702 Register out = locations->Out().AsRegister<Register>();
2703 InvokeRuntimeCallingConvention calling_convention;
2704 Register argument = calling_convention.GetRegisterAt(0);
2705 if (invoke->InputAt(0)->IsConstant()) {
2706 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2707 if (value >= info.low && value <= info.high) {
2708 // Just embed the j.l.Integer in the code.
2709 ScopedObjectAccess soa(Thread::Current());
2710 mirror::Object* boxed = info.cache->Get(value + (-info.low));
2711 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
2712 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
2713 __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
2714 } else {
2715 // Allocate and initialize a new j.l.Integer.
2716 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2717 // JIT object table.
2718 uint32_t address =
2719 dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
2720 __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
2721 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2722 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2723 __ LoadImmediate(IP, value);
2724 __ StoreToOffset(kStoreWord, IP, out, info.value_offset);
2725 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2726 // one.
2727 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2728 }
2729 } else {
2730 Register in = locations->InAt(0).AsRegister<Register>();
2731 // Check bounds of our cache.
2732 __ AddConstant(out, in, -info.low);
2733 __ CmpConstant(out, info.high - info.low + 1);
2734 Label allocate, done;
2735 __ b(&allocate, HS);
2736 // If the value is within the bounds, load the j.l.Integer directly from the array.
2737 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
2738 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
2739 __ LoadLiteral(IP, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
2740 codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), IP, out);
2741 __ MaybeUnpoisonHeapReference(out);
2742 __ b(&done);
2743 __ Bind(&allocate);
2744 // Otherwise allocate and initialize a new j.l.Integer.
2745 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
2746 __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
2747 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2748 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2749 __ StoreToOffset(kStoreWord, in, out, info.value_offset);
2750 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2751 // one.
2752 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2753 __ Bind(&done);
2754 }
2755 }
2756
2757 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
2758 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
2759 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
2760 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxFloatFloat)
2761 UNIMPLEMENTED_INTRINSIC(ARM, MathMinLongLong)
2762 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxLongLong)
2763 UNIMPLEMENTED_INTRINSIC(ARM, MathCeil) // Could be done by changing rounding mode, maybe?
2764 UNIMPLEMENTED_INTRINSIC(ARM, MathFloor) // Could be done by changing rounding mode, maybe?
2765 UNIMPLEMENTED_INTRINSIC(ARM, MathRint)
2766 UNIMPLEMENTED_INTRINSIC(ARM, MathRoundDouble) // Could be done by changing rounding mode, maybe?
2767 UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat) // Could be done by changing rounding mode, maybe?
2768 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong) // High register pressure.
2769 UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
2770 UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
2771 UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
2772 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
2773 UNIMPLEMENTED_INTRINSIC(ARM, LongLowestOneBit)
2774
2775 UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOf);
2776 UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOfAfter);
2777 UNIMPLEMENTED_INTRINSIC(ARM, StringBufferAppend);
2778 UNIMPLEMENTED_INTRINSIC(ARM, StringBufferLength);
2779 UNIMPLEMENTED_INTRINSIC(ARM, StringBufferToString);
2780 UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderAppend);
2781 UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderLength);
2782 UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderToString);
2783
2784 // 1.8.
2785 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddInt)
2786 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddLong)
2787 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetInt)
2788 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetLong)
2789 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetObject)
2790
2791 UNREACHABLE_INTRINSICS(ARM)
2792
2793 #undef __
2794
2795 } // namespace arm
2796 } // namespace art
2797