1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm64.h"
18
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "art_method.h"
21 #include "code_generator_arm64.h"
22 #include "common_arm64.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "intrinsics.h"
25 #include "lock_word.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/object_array-inl.h"
28 #include "mirror/reference.h"
29 #include "mirror/string-inl.h"
30 #include "scoped_thread_state_change-inl.h"
31 #include "thread-inl.h"
32 #include "utils/arm64/assembler_arm64.h"
33
34 using namespace vixl::aarch64; // NOLINT(build/namespaces)
35
36 // TODO(VIXL): Make VIXL compile with -Wshadow.
37 #pragma GCC diagnostic push
38 #pragma GCC diagnostic ignored "-Wshadow"
39 #include "aarch64/disasm-aarch64.h"
40 #include "aarch64/macro-assembler-aarch64.h"
41 #pragma GCC diagnostic pop
42
43 namespace art {
44
45 namespace arm64 {
46
47 using helpers::DRegisterFrom;
48 using helpers::FPRegisterFrom;
49 using helpers::HeapOperand;
50 using helpers::LocationFrom;
51 using helpers::OperandFrom;
52 using helpers::RegisterFrom;
53 using helpers::SRegisterFrom;
54 using helpers::WRegisterFrom;
55 using helpers::XRegisterFrom;
56 using helpers::InputRegisterAt;
57 using helpers::OutputRegister;
58
59 namespace {
60
AbsoluteHeapOperandFrom(Location location,size_t offset=0)61 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
62 return MemOperand(XRegisterFrom(location), offset);
63 }
64
65 } // namespace
66
GetVIXLAssembler()67 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
68 return codegen_->GetVIXLAssembler();
69 }
70
GetAllocator()71 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
72 return codegen_->GetGraph()->GetArena();
73 }
74
75 #define __ codegen->GetVIXLAssembler()->
76
MoveFromReturnRegister(Location trg,Primitive::Type type,CodeGeneratorARM64 * codegen)77 static void MoveFromReturnRegister(Location trg,
78 Primitive::Type type,
79 CodeGeneratorARM64* codegen) {
80 if (!trg.IsValid()) {
81 DCHECK(type == Primitive::kPrimVoid);
82 return;
83 }
84
85 DCHECK_NE(type, Primitive::kPrimVoid);
86
87 if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
88 Register trg_reg = RegisterFrom(trg, type);
89 Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
90 __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
91 } else {
92 FPRegister trg_reg = FPRegisterFrom(trg, type);
93 FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
94 __ Fmov(trg_reg, res_reg);
95 }
96 }
97
MoveArguments(HInvoke * invoke,CodeGeneratorARM64 * codegen)98 static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
99 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
100 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
101 }
102
103 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
104 // call. This will copy the arguments into the positions for a regular call.
105 //
106 // Note: The actual parameters are required to be in the locations given by the invoke's location
107 // summary. If an intrinsic modifies those locations before a slowpath call, they must be
108 // restored!
109 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
110 public:
IntrinsicSlowPathARM64(HInvoke * invoke)111 explicit IntrinsicSlowPathARM64(HInvoke* invoke)
112 : SlowPathCodeARM64(invoke), invoke_(invoke) { }
113
EmitNativeCode(CodeGenerator * codegen_in)114 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
115 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
116 __ Bind(GetEntryLabel());
117
118 SaveLiveRegisters(codegen, invoke_->GetLocations());
119
120 MoveArguments(invoke_, codegen);
121
122 {
123 // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there
124 // are no pools emitted.
125 vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
126 if (invoke_->IsInvokeStaticOrDirect()) {
127 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
128 LocationFrom(kArtMethodRegister));
129 } else {
130 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister));
131 }
132 codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
133 }
134
135 // Copy the result back to the expected output.
136 Location out = invoke_->GetLocations()->Out();
137 if (out.IsValid()) {
138 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
139 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
140 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
141 }
142
143 RestoreLiveRegisters(codegen, invoke_->GetLocations());
144 __ B(GetExitLabel());
145 }
146
GetDescription() const147 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; }
148
149 private:
150 // The instruction where this slow path is happening.
151 HInvoke* const invoke_;
152
153 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
154 };
155
156 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
157 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
158 public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction * instruction,Location tmp)159 ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
160 : SlowPathCodeARM64(instruction), tmp_(tmp) {
161 DCHECK(kEmitCompilerReadBarrier);
162 DCHECK(kUseBakerReadBarrier);
163 }
164
EmitNativeCode(CodeGenerator * codegen_in)165 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
166 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
167 LocationSummary* locations = instruction_->GetLocations();
168 DCHECK(locations->CanCall());
169 DCHECK(instruction_->IsInvokeStaticOrDirect())
170 << "Unexpected instruction in read barrier arraycopy slow path: "
171 << instruction_->DebugName();
172 DCHECK(instruction_->GetLocations()->Intrinsified());
173 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
174
175 const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
176
177 Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
178 Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
179 Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
180 Register tmp_reg = WRegisterFrom(tmp_);
181
182 __ Bind(GetEntryLabel());
183 vixl::aarch64::Label slow_copy_loop;
184 __ Bind(&slow_copy_loop);
185 __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
186 codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
187 // TODO: Inline the mark bit check before calling the runtime?
188 // tmp_reg = ReadBarrier::Mark(tmp_reg);
189 // No need to save live registers; it's taken care of by the
190 // entrypoint. Also, there is no need to update the stack mask,
191 // as this runtime call will not trigger a garbage collection.
192 // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
193 // explanations.)
194 DCHECK_NE(tmp_.reg(), LR);
195 DCHECK_NE(tmp_.reg(), WSP);
196 DCHECK_NE(tmp_.reg(), WZR);
197 // IP0 is used internally by the ReadBarrierMarkRegX entry point
198 // as a temporary (and not preserved). It thus cannot be used by
199 // any live register in this slow path.
200 DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
201 DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
202 DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
203 DCHECK_NE(tmp_.reg(), IP0);
204 DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
205 // TODO: Load the entrypoint once before the loop, instead of
206 // loading it at every iteration.
207 int32_t entry_point_offset =
208 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
209 // This runtime call does not require a stack map.
210 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
211 codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
212 __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
213 __ Cmp(src_curr_addr, src_stop_addr);
214 __ B(&slow_copy_loop, ne);
215 __ B(GetExitLabel());
216 }
217
GetDescription() const218 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
219
220 private:
221 Location tmp_;
222
223 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
224 };
225 #undef __
226
TryDispatch(HInvoke * invoke)227 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
228 Dispatch(invoke);
229 LocationSummary* res = invoke->GetLocations();
230 if (res == nullptr) {
231 return false;
232 }
233 return res->Intrinsified();
234 }
235
236 #define __ masm->
237
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)238 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
239 LocationSummary* locations = new (arena) LocationSummary(invoke,
240 LocationSummary::kNoCall,
241 kIntrinsified);
242 locations->SetInAt(0, Location::RequiresFpuRegister());
243 locations->SetOut(Location::RequiresRegister());
244 }
245
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)246 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
247 LocationSummary* locations = new (arena) LocationSummary(invoke,
248 LocationSummary::kNoCall,
249 kIntrinsified);
250 locations->SetInAt(0, Location::RequiresRegister());
251 locations->SetOut(Location::RequiresFpuRegister());
252 }
253
MoveFPToInt(LocationSummary * locations,bool is64bit,MacroAssembler * masm)254 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
255 Location input = locations->InAt(0);
256 Location output = locations->Out();
257 __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
258 is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
259 }
260
MoveIntToFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)261 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
262 Location input = locations->InAt(0);
263 Location output = locations->Out();
264 __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
265 is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
266 }
267
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)268 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
269 CreateFPToIntLocations(arena_, invoke);
270 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)271 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
272 CreateIntToFPLocations(arena_, invoke);
273 }
274
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)275 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
276 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
277 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)278 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
279 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
280 }
281
VisitFloatFloatToRawIntBits(HInvoke * invoke)282 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
283 CreateFPToIntLocations(arena_, invoke);
284 }
VisitFloatIntBitsToFloat(HInvoke * invoke)285 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
286 CreateIntToFPLocations(arena_, invoke);
287 }
288
VisitFloatFloatToRawIntBits(HInvoke * invoke)289 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
290 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
291 }
VisitFloatIntBitsToFloat(HInvoke * invoke)292 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
293 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
294 }
295
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)296 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
297 LocationSummary* locations = new (arena) LocationSummary(invoke,
298 LocationSummary::kNoCall,
299 kIntrinsified);
300 locations->SetInAt(0, Location::RequiresRegister());
301 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
302 }
303
GenReverseBytes(LocationSummary * locations,Primitive::Type type,MacroAssembler * masm)304 static void GenReverseBytes(LocationSummary* locations,
305 Primitive::Type type,
306 MacroAssembler* masm) {
307 Location in = locations->InAt(0);
308 Location out = locations->Out();
309
310 switch (type) {
311 case Primitive::kPrimShort:
312 __ Rev16(WRegisterFrom(out), WRegisterFrom(in));
313 __ Sxth(WRegisterFrom(out), WRegisterFrom(out));
314 break;
315 case Primitive::kPrimInt:
316 case Primitive::kPrimLong:
317 __ Rev(RegisterFrom(out, type), RegisterFrom(in, type));
318 break;
319 default:
320 LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
321 UNREACHABLE();
322 }
323 }
324
VisitIntegerReverseBytes(HInvoke * invoke)325 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
326 CreateIntToIntLocations(arena_, invoke);
327 }
328
VisitIntegerReverseBytes(HInvoke * invoke)329 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
330 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
331 }
332
VisitLongReverseBytes(HInvoke * invoke)333 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
334 CreateIntToIntLocations(arena_, invoke);
335 }
336
VisitLongReverseBytes(HInvoke * invoke)337 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
338 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
339 }
340
VisitShortReverseBytes(HInvoke * invoke)341 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
342 CreateIntToIntLocations(arena_, invoke);
343 }
344
VisitShortReverseBytes(HInvoke * invoke)345 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
346 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetVIXLAssembler());
347 }
348
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)349 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
350 LocationSummary* locations = new (arena) LocationSummary(invoke,
351 LocationSummary::kNoCall,
352 kIntrinsified);
353 locations->SetInAt(0, Location::RequiresRegister());
354 locations->SetInAt(1, Location::RequiresRegister());
355 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
356 }
357
GenNumberOfLeadingZeros(LocationSummary * locations,Primitive::Type type,MacroAssembler * masm)358 static void GenNumberOfLeadingZeros(LocationSummary* locations,
359 Primitive::Type type,
360 MacroAssembler* masm) {
361 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
362
363 Location in = locations->InAt(0);
364 Location out = locations->Out();
365
366 __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
367 }
368
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)369 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
370 CreateIntToIntLocations(arena_, invoke);
371 }
372
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)373 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
374 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
375 }
376
VisitLongNumberOfLeadingZeros(HInvoke * invoke)377 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
378 CreateIntToIntLocations(arena_, invoke);
379 }
380
VisitLongNumberOfLeadingZeros(HInvoke * invoke)381 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
382 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
383 }
384
GenNumberOfTrailingZeros(LocationSummary * locations,Primitive::Type type,MacroAssembler * masm)385 static void GenNumberOfTrailingZeros(LocationSummary* locations,
386 Primitive::Type type,
387 MacroAssembler* masm) {
388 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
389
390 Location in = locations->InAt(0);
391 Location out = locations->Out();
392
393 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
394 __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
395 }
396
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)397 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
398 CreateIntToIntLocations(arena_, invoke);
399 }
400
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)401 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
402 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
403 }
404
VisitLongNumberOfTrailingZeros(HInvoke * invoke)405 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
406 CreateIntToIntLocations(arena_, invoke);
407 }
408
VisitLongNumberOfTrailingZeros(HInvoke * invoke)409 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
410 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
411 }
412
GenReverse(LocationSummary * locations,Primitive::Type type,MacroAssembler * masm)413 static void GenReverse(LocationSummary* locations,
414 Primitive::Type type,
415 MacroAssembler* masm) {
416 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
417
418 Location in = locations->InAt(0);
419 Location out = locations->Out();
420
421 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
422 }
423
VisitIntegerReverse(HInvoke * invoke)424 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
425 CreateIntToIntLocations(arena_, invoke);
426 }
427
VisitIntegerReverse(HInvoke * invoke)428 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
429 GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
430 }
431
VisitLongReverse(HInvoke * invoke)432 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
433 CreateIntToIntLocations(arena_, invoke);
434 }
435
VisitLongReverse(HInvoke * invoke)436 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
437 GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
438 }
439
GenBitCount(HInvoke * instr,Primitive::Type type,MacroAssembler * masm)440 static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* masm) {
441 DCHECK(Primitive::IsIntOrLongType(type)) << type;
442 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
443 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
444
445 UseScratchRegisterScope temps(masm);
446
447 Register src = InputRegisterAt(instr, 0);
448 Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
449 FPRegister fpr = (type == Primitive::kPrimLong) ? temps.AcquireD() : temps.AcquireS();
450
451 __ Fmov(fpr, src);
452 __ Cnt(fpr.V8B(), fpr.V8B());
453 __ Addv(fpr.B(), fpr.V8B());
454 __ Fmov(dst, fpr);
455 }
456
VisitLongBitCount(HInvoke * invoke)457 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
458 CreateIntToIntLocations(arena_, invoke);
459 }
460
VisitLongBitCount(HInvoke * invoke)461 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
462 GenBitCount(invoke, Primitive::kPrimLong, GetVIXLAssembler());
463 }
464
VisitIntegerBitCount(HInvoke * invoke)465 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
466 CreateIntToIntLocations(arena_, invoke);
467 }
468
VisitIntegerBitCount(HInvoke * invoke)469 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
470 GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler());
471 }
472
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)473 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
474 LocationSummary* locations = new (arena) LocationSummary(invoke,
475 LocationSummary::kNoCall,
476 kIntrinsified);
477 locations->SetInAt(0, Location::RequiresFpuRegister());
478 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
479 }
480
MathAbsFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)481 static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
482 Location in = locations->InAt(0);
483 Location out = locations->Out();
484
485 FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in);
486 FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out);
487
488 __ Fabs(out_reg, in_reg);
489 }
490
VisitMathAbsDouble(HInvoke * invoke)491 void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) {
492 CreateFPToFPLocations(arena_, invoke);
493 }
494
VisitMathAbsDouble(HInvoke * invoke)495 void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
496 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
497 }
498
VisitMathAbsFloat(HInvoke * invoke)499 void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
500 CreateFPToFPLocations(arena_, invoke);
501 }
502
VisitMathAbsFloat(HInvoke * invoke)503 void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
504 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
505 }
506
CreateIntToInt(ArenaAllocator * arena,HInvoke * invoke)507 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
508 LocationSummary* locations = new (arena) LocationSummary(invoke,
509 LocationSummary::kNoCall,
510 kIntrinsified);
511 locations->SetInAt(0, Location::RequiresRegister());
512 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
513 }
514
GenAbsInteger(LocationSummary * locations,bool is64bit,MacroAssembler * masm)515 static void GenAbsInteger(LocationSummary* locations,
516 bool is64bit,
517 MacroAssembler* masm) {
518 Location in = locations->InAt(0);
519 Location output = locations->Out();
520
521 Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in);
522 Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output);
523
524 __ Cmp(in_reg, Operand(0));
525 __ Cneg(out_reg, in_reg, lt);
526 }
527
VisitMathAbsInt(HInvoke * invoke)528 void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
529 CreateIntToInt(arena_, invoke);
530 }
531
VisitMathAbsInt(HInvoke * invoke)532 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
533 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
534 }
535
VisitMathAbsLong(HInvoke * invoke)536 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
537 CreateIntToInt(arena_, invoke);
538 }
539
VisitMathAbsLong(HInvoke * invoke)540 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
541 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
542 }
543
GenMinMaxFP(LocationSummary * locations,bool is_min,bool is_double,MacroAssembler * masm)544 static void GenMinMaxFP(LocationSummary* locations,
545 bool is_min,
546 bool is_double,
547 MacroAssembler* masm) {
548 Location op1 = locations->InAt(0);
549 Location op2 = locations->InAt(1);
550 Location out = locations->Out();
551
552 FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
553 FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
554 FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
555 if (is_min) {
556 __ Fmin(out_reg, op1_reg, op2_reg);
557 } else {
558 __ Fmax(out_reg, op1_reg, op2_reg);
559 }
560 }
561
CreateFPFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)562 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
563 LocationSummary* locations = new (arena) LocationSummary(invoke,
564 LocationSummary::kNoCall,
565 kIntrinsified);
566 locations->SetInAt(0, Location::RequiresFpuRegister());
567 locations->SetInAt(1, Location::RequiresFpuRegister());
568 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
569 }
570
VisitMathMinDoubleDouble(HInvoke * invoke)571 void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
572 CreateFPFPToFPLocations(arena_, invoke);
573 }
574
VisitMathMinDoubleDouble(HInvoke * invoke)575 void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
576 GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
577 }
578
VisitMathMinFloatFloat(HInvoke * invoke)579 void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
580 CreateFPFPToFPLocations(arena_, invoke);
581 }
582
VisitMathMinFloatFloat(HInvoke * invoke)583 void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
584 GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
585 }
586
VisitMathMaxDoubleDouble(HInvoke * invoke)587 void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
588 CreateFPFPToFPLocations(arena_, invoke);
589 }
590
VisitMathMaxDoubleDouble(HInvoke * invoke)591 void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
592 GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
593 }
594
VisitMathMaxFloatFloat(HInvoke * invoke)595 void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
596 CreateFPFPToFPLocations(arena_, invoke);
597 }
598
VisitMathMaxFloatFloat(HInvoke * invoke)599 void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
600 GenMinMaxFP(
601 invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
602 }
603
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,MacroAssembler * masm)604 static void GenMinMax(LocationSummary* locations,
605 bool is_min,
606 bool is_long,
607 MacroAssembler* masm) {
608 Location op1 = locations->InAt(0);
609 Location op2 = locations->InAt(1);
610 Location out = locations->Out();
611
612 Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
613 Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
614 Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
615
616 __ Cmp(op1_reg, op2_reg);
617 __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
618 }
619
VisitMathMinIntInt(HInvoke * invoke)620 void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
621 CreateIntIntToIntLocations(arena_, invoke);
622 }
623
VisitMathMinIntInt(HInvoke * invoke)624 void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
625 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
626 }
627
VisitMathMinLongLong(HInvoke * invoke)628 void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
629 CreateIntIntToIntLocations(arena_, invoke);
630 }
631
VisitMathMinLongLong(HInvoke * invoke)632 void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
633 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
634 }
635
VisitMathMaxIntInt(HInvoke * invoke)636 void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
637 CreateIntIntToIntLocations(arena_, invoke);
638 }
639
VisitMathMaxIntInt(HInvoke * invoke)640 void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
641 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
642 }
643
VisitMathMaxLongLong(HInvoke * invoke)644 void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
645 CreateIntIntToIntLocations(arena_, invoke);
646 }
647
VisitMathMaxLongLong(HInvoke * invoke)648 void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
649 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
650 }
651
VisitMathSqrt(HInvoke * invoke)652 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
653 CreateFPToFPLocations(arena_, invoke);
654 }
655
VisitMathSqrt(HInvoke * invoke)656 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
657 LocationSummary* locations = invoke->GetLocations();
658 MacroAssembler* masm = GetVIXLAssembler();
659 __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
660 }
661
VisitMathCeil(HInvoke * invoke)662 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
663 CreateFPToFPLocations(arena_, invoke);
664 }
665
VisitMathCeil(HInvoke * invoke)666 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
667 LocationSummary* locations = invoke->GetLocations();
668 MacroAssembler* masm = GetVIXLAssembler();
669 __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
670 }
671
VisitMathFloor(HInvoke * invoke)672 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
673 CreateFPToFPLocations(arena_, invoke);
674 }
675
VisitMathFloor(HInvoke * invoke)676 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
677 LocationSummary* locations = invoke->GetLocations();
678 MacroAssembler* masm = GetVIXLAssembler();
679 __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
680 }
681
VisitMathRint(HInvoke * invoke)682 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
683 CreateFPToFPLocations(arena_, invoke);
684 }
685
VisitMathRint(HInvoke * invoke)686 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
687 LocationSummary* locations = invoke->GetLocations();
688 MacroAssembler* masm = GetVIXLAssembler();
689 __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
690 }
691
CreateFPToIntPlusFPTempLocations(ArenaAllocator * arena,HInvoke * invoke)692 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
693 LocationSummary* locations = new (arena) LocationSummary(invoke,
694 LocationSummary::kNoCall,
695 kIntrinsified);
696 locations->SetInAt(0, Location::RequiresFpuRegister());
697 locations->SetOut(Location::RequiresRegister());
698 locations->AddTemp(Location::RequiresFpuRegister());
699 }
700
GenMathRound(HInvoke * invoke,bool is_double,vixl::aarch64::MacroAssembler * masm)701 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
702 // Java 8 API definition for Math.round():
703 // Return the closest long or int to the argument, with ties rounding to positive infinity.
704 //
705 // There is no single instruction in ARMv8 that can support the above definition.
706 // We choose to use FCVTAS here, because it has closest semantic.
707 // FCVTAS performs rounding to nearest integer, ties away from zero.
708 // For most inputs (positive values, zero or NaN), this instruction is enough.
709 // We only need a few handling code after FCVTAS if the input is negative half value.
710 //
711 // The reason why we didn't choose FCVTPS instruction here is that
712 // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
713 // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
714 // If we were using this instruction, for most inputs, more handling code would be needed.
715 LocationSummary* l = invoke->GetLocations();
716 FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
717 FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
718 Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
719 vixl::aarch64::Label done;
720
721 // Round to nearest integer, ties away from zero.
722 __ Fcvtas(out_reg, in_reg);
723
724 // For positive values, zero or NaN inputs, rounding is done.
725 __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
726
727 // Handle input < 0 cases.
728 // If input is negative but not a tie, previous result (round to nearest) is valid.
729 // If input is a negative tie, out_reg += 1.
730 __ Frinta(tmp_fp, in_reg);
731 __ Fsub(tmp_fp, in_reg, tmp_fp);
732 __ Fcmp(tmp_fp, 0.5);
733 __ Cinc(out_reg, out_reg, eq);
734
735 __ Bind(&done);
736 }
737
VisitMathRoundDouble(HInvoke * invoke)738 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
739 CreateFPToIntPlusFPTempLocations(arena_, invoke);
740 }
741
VisitMathRoundDouble(HInvoke * invoke)742 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
743 GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
744 }
745
VisitMathRoundFloat(HInvoke * invoke)746 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
747 CreateFPToIntPlusFPTempLocations(arena_, invoke);
748 }
749
VisitMathRoundFloat(HInvoke * invoke)750 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
751 GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
752 }
753
VisitMemoryPeekByte(HInvoke * invoke)754 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
755 CreateIntToIntLocations(arena_, invoke);
756 }
757
VisitMemoryPeekByte(HInvoke * invoke)758 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
759 MacroAssembler* masm = GetVIXLAssembler();
760 __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
761 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
762 }
763
VisitMemoryPeekIntNative(HInvoke * invoke)764 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
765 CreateIntToIntLocations(arena_, invoke);
766 }
767
VisitMemoryPeekIntNative(HInvoke * invoke)768 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
769 MacroAssembler* masm = GetVIXLAssembler();
770 __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
771 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
772 }
773
VisitMemoryPeekLongNative(HInvoke * invoke)774 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
775 CreateIntToIntLocations(arena_, invoke);
776 }
777
VisitMemoryPeekLongNative(HInvoke * invoke)778 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
779 MacroAssembler* masm = GetVIXLAssembler();
780 __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
781 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
782 }
783
VisitMemoryPeekShortNative(HInvoke * invoke)784 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
785 CreateIntToIntLocations(arena_, invoke);
786 }
787
VisitMemoryPeekShortNative(HInvoke * invoke)788 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
789 MacroAssembler* masm = GetVIXLAssembler();
790 __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
791 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
792 }
793
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)794 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
795 LocationSummary* locations = new (arena) LocationSummary(invoke,
796 LocationSummary::kNoCall,
797 kIntrinsified);
798 locations->SetInAt(0, Location::RequiresRegister());
799 locations->SetInAt(1, Location::RequiresRegister());
800 }
801
VisitMemoryPokeByte(HInvoke * invoke)802 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
803 CreateIntIntToVoidLocations(arena_, invoke);
804 }
805
VisitMemoryPokeByte(HInvoke * invoke)806 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
807 MacroAssembler* masm = GetVIXLAssembler();
808 __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
809 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
810 }
811
VisitMemoryPokeIntNative(HInvoke * invoke)812 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
813 CreateIntIntToVoidLocations(arena_, invoke);
814 }
815
VisitMemoryPokeIntNative(HInvoke * invoke)816 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
817 MacroAssembler* masm = GetVIXLAssembler();
818 __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
819 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
820 }
821
VisitMemoryPokeLongNative(HInvoke * invoke)822 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
823 CreateIntIntToVoidLocations(arena_, invoke);
824 }
825
VisitMemoryPokeLongNative(HInvoke * invoke)826 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
827 MacroAssembler* masm = GetVIXLAssembler();
828 __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
829 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
830 }
831
VisitMemoryPokeShortNative(HInvoke * invoke)832 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
833 CreateIntIntToVoidLocations(arena_, invoke);
834 }
835
VisitMemoryPokeShortNative(HInvoke * invoke)836 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
837 MacroAssembler* masm = GetVIXLAssembler();
838 __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
839 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
840 }
841
VisitThreadCurrentThread(HInvoke * invoke)842 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
843 LocationSummary* locations = new (arena_) LocationSummary(invoke,
844 LocationSummary::kNoCall,
845 kIntrinsified);
846 locations->SetOut(Location::RequiresRegister());
847 }
848
VisitThreadCurrentThread(HInvoke * invoke)849 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
850 codegen_->Load(Primitive::kPrimNot, WRegisterFrom(invoke->GetLocations()->Out()),
851 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
852 }
853
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)854 static void GenUnsafeGet(HInvoke* invoke,
855 Primitive::Type type,
856 bool is_volatile,
857 CodeGeneratorARM64* codegen) {
858 LocationSummary* locations = invoke->GetLocations();
859 DCHECK((type == Primitive::kPrimInt) ||
860 (type == Primitive::kPrimLong) ||
861 (type == Primitive::kPrimNot));
862 Location base_loc = locations->InAt(1);
863 Register base = WRegisterFrom(base_loc); // Object pointer.
864 Location offset_loc = locations->InAt(2);
865 Register offset = XRegisterFrom(offset_loc); // Long offset.
866 Location trg_loc = locations->Out();
867 Register trg = RegisterFrom(trg_loc, type);
868
869 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
870 // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
871 Register temp = WRegisterFrom(locations->GetTemp(0));
872 codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
873 trg_loc,
874 base,
875 /* offset */ 0u,
876 /* index */ offset_loc,
877 /* scale_factor */ 0u,
878 temp,
879 /* needs_null_check */ false,
880 is_volatile);
881 } else {
882 // Other cases.
883 MemOperand mem_op(base.X(), offset);
884 if (is_volatile) {
885 codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
886 } else {
887 codegen->Load(type, trg, mem_op);
888 }
889
890 if (type == Primitive::kPrimNot) {
891 DCHECK(trg.IsW());
892 codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
893 }
894 }
895 }
896
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)897 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
898 bool can_call = kEmitCompilerReadBarrier &&
899 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
900 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
901 LocationSummary* locations = new (arena) LocationSummary(invoke,
902 (can_call
903 ? LocationSummary::kCallOnSlowPath
904 : LocationSummary::kNoCall),
905 kIntrinsified);
906 if (can_call && kUseBakerReadBarrier) {
907 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
908 // We need a temporary register for the read barrier marking slow
909 // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
910 locations->AddTemp(Location::RequiresRegister());
911 }
912 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
913 locations->SetInAt(1, Location::RequiresRegister());
914 locations->SetInAt(2, Location::RequiresRegister());
915 locations->SetOut(Location::RequiresRegister(),
916 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
917 }
918
VisitUnsafeGet(HInvoke * invoke)919 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
920 CreateIntIntIntToIntLocations(arena_, invoke);
921 }
VisitUnsafeGetVolatile(HInvoke * invoke)922 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
923 CreateIntIntIntToIntLocations(arena_, invoke);
924 }
VisitUnsafeGetLong(HInvoke * invoke)925 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
926 CreateIntIntIntToIntLocations(arena_, invoke);
927 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)928 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
929 CreateIntIntIntToIntLocations(arena_, invoke);
930 }
VisitUnsafeGetObject(HInvoke * invoke)931 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
932 CreateIntIntIntToIntLocations(arena_, invoke);
933 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)934 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
935 CreateIntIntIntToIntLocations(arena_, invoke);
936 }
937
VisitUnsafeGet(HInvoke * invoke)938 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
939 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
940 }
VisitUnsafeGetVolatile(HInvoke * invoke)941 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
942 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
943 }
VisitUnsafeGetLong(HInvoke * invoke)944 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
945 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
946 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)947 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
948 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
949 }
VisitUnsafeGetObject(HInvoke * invoke)950 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
951 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
952 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)953 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
954 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
955 }
956
CreateIntIntIntIntToVoid(ArenaAllocator * arena,HInvoke * invoke)957 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
958 LocationSummary* locations = new (arena) LocationSummary(invoke,
959 LocationSummary::kNoCall,
960 kIntrinsified);
961 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
962 locations->SetInAt(1, Location::RequiresRegister());
963 locations->SetInAt(2, Location::RequiresRegister());
964 locations->SetInAt(3, Location::RequiresRegister());
965 }
966
VisitUnsafePut(HInvoke * invoke)967 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
968 CreateIntIntIntIntToVoid(arena_, invoke);
969 }
VisitUnsafePutOrdered(HInvoke * invoke)970 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
971 CreateIntIntIntIntToVoid(arena_, invoke);
972 }
VisitUnsafePutVolatile(HInvoke * invoke)973 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
974 CreateIntIntIntIntToVoid(arena_, invoke);
975 }
VisitUnsafePutObject(HInvoke * invoke)976 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
977 CreateIntIntIntIntToVoid(arena_, invoke);
978 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)979 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
980 CreateIntIntIntIntToVoid(arena_, invoke);
981 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)982 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
983 CreateIntIntIntIntToVoid(arena_, invoke);
984 }
VisitUnsafePutLong(HInvoke * invoke)985 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
986 CreateIntIntIntIntToVoid(arena_, invoke);
987 }
VisitUnsafePutLongOrdered(HInvoke * invoke)988 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
989 CreateIntIntIntIntToVoid(arena_, invoke);
990 }
VisitUnsafePutLongVolatile(HInvoke * invoke)991 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
992 CreateIntIntIntIntToVoid(arena_, invoke);
993 }
994
GenUnsafePut(HInvoke * invoke,Primitive::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)995 static void GenUnsafePut(HInvoke* invoke,
996 Primitive::Type type,
997 bool is_volatile,
998 bool is_ordered,
999 CodeGeneratorARM64* codegen) {
1000 LocationSummary* locations = invoke->GetLocations();
1001 MacroAssembler* masm = codegen->GetVIXLAssembler();
1002
1003 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
1004 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
1005 Register value = RegisterFrom(locations->InAt(3), type);
1006 Register source = value;
1007 MemOperand mem_op(base.X(), offset);
1008
1009 {
1010 // We use a block to end the scratch scope before the write barrier, thus
1011 // freeing the temporary registers so they can be used in `MarkGCCard`.
1012 UseScratchRegisterScope temps(masm);
1013
1014 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1015 DCHECK(value.IsW());
1016 Register temp = temps.AcquireW();
1017 __ Mov(temp.W(), value.W());
1018 codegen->GetAssembler()->PoisonHeapReference(temp.W());
1019 source = temp;
1020 }
1021
1022 if (is_volatile || is_ordered) {
1023 codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false);
1024 } else {
1025 codegen->Store(type, source, mem_op);
1026 }
1027 }
1028
1029 if (type == Primitive::kPrimNot) {
1030 bool value_can_be_null = true; // TODO: Worth finding out this information?
1031 codegen->MarkGCCard(base, value, value_can_be_null);
1032 }
1033 }
1034
VisitUnsafePut(HInvoke * invoke)1035 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
1036 GenUnsafePut(invoke,
1037 Primitive::kPrimInt,
1038 /* is_volatile */ false,
1039 /* is_ordered */ false,
1040 codegen_);
1041 }
VisitUnsafePutOrdered(HInvoke * invoke)1042 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
1043 GenUnsafePut(invoke,
1044 Primitive::kPrimInt,
1045 /* is_volatile */ false,
1046 /* is_ordered */ true,
1047 codegen_);
1048 }
VisitUnsafePutVolatile(HInvoke * invoke)1049 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
1050 GenUnsafePut(invoke,
1051 Primitive::kPrimInt,
1052 /* is_volatile */ true,
1053 /* is_ordered */ false,
1054 codegen_);
1055 }
VisitUnsafePutObject(HInvoke * invoke)1056 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
1057 GenUnsafePut(invoke,
1058 Primitive::kPrimNot,
1059 /* is_volatile */ false,
1060 /* is_ordered */ false,
1061 codegen_);
1062 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1063 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1064 GenUnsafePut(invoke,
1065 Primitive::kPrimNot,
1066 /* is_volatile */ false,
1067 /* is_ordered */ true,
1068 codegen_);
1069 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1070 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1071 GenUnsafePut(invoke,
1072 Primitive::kPrimNot,
1073 /* is_volatile */ true,
1074 /* is_ordered */ false,
1075 codegen_);
1076 }
VisitUnsafePutLong(HInvoke * invoke)1077 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
1078 GenUnsafePut(invoke,
1079 Primitive::kPrimLong,
1080 /* is_volatile */ false,
1081 /* is_ordered */ false,
1082 codegen_);
1083 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1084 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1085 GenUnsafePut(invoke,
1086 Primitive::kPrimLong,
1087 /* is_volatile */ false,
1088 /* is_ordered */ true,
1089 codegen_);
1090 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1091 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1092 GenUnsafePut(invoke,
1093 Primitive::kPrimLong,
1094 /* is_volatile */ true,
1095 /* is_ordered */ false,
1096 codegen_);
1097 }
1098
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)1099 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
1100 HInvoke* invoke,
1101 Primitive::Type type) {
1102 bool can_call = kEmitCompilerReadBarrier &&
1103 kUseBakerReadBarrier &&
1104 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1105 LocationSummary* locations = new (arena) LocationSummary(invoke,
1106 (can_call
1107 ? LocationSummary::kCallOnSlowPath
1108 : LocationSummary::kNoCall),
1109 kIntrinsified);
1110 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1111 locations->SetInAt(1, Location::RequiresRegister());
1112 locations->SetInAt(2, Location::RequiresRegister());
1113 locations->SetInAt(3, Location::RequiresRegister());
1114 locations->SetInAt(4, Location::RequiresRegister());
1115
1116 // If heap poisoning is enabled, we don't want the unpoisoning
1117 // operations to potentially clobber the output. Likewise when
1118 // emitting a (Baker) read barrier, which may call.
1119 Location::OutputOverlap overlaps =
1120 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
1121 ? Location::kOutputOverlap
1122 : Location::kNoOutputOverlap;
1123 locations->SetOut(Location::RequiresRegister(), overlaps);
1124 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1125 // Temporary register for (Baker) read barrier.
1126 locations->AddTemp(Location::RequiresRegister());
1127 }
1128 }
1129
GenCas(HInvoke * invoke,Primitive::Type type,CodeGeneratorARM64 * codegen)1130 static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) {
1131 MacroAssembler* masm = codegen->GetVIXLAssembler();
1132 LocationSummary* locations = invoke->GetLocations();
1133
1134 Location out_loc = locations->Out();
1135 Register out = WRegisterFrom(out_loc); // Boolean result.
1136
1137 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
1138 Location offset_loc = locations->InAt(2);
1139 Register offset = XRegisterFrom(offset_loc); // Long offset.
1140 Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
1141 Register value = RegisterFrom(locations->InAt(4), type); // Value.
1142
1143 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1144 if (type == Primitive::kPrimNot) {
1145 // Mark card for object assuming new value is stored.
1146 bool value_can_be_null = true; // TODO: Worth finding out this information?
1147 codegen->MarkGCCard(base, value, value_can_be_null);
1148
1149 // The only read barrier implementation supporting the
1150 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1151 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1152
1153 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1154 Register temp = WRegisterFrom(locations->GetTemp(0));
1155 // Need to make sure the reference stored in the field is a to-space
1156 // one before attempting the CAS or the CAS could fail incorrectly.
1157 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1158 invoke,
1159 out_loc, // Unused, used only as a "temporary" within the read barrier.
1160 base,
1161 /* offset */ 0u,
1162 /* index */ offset_loc,
1163 /* scale_factor */ 0u,
1164 temp,
1165 /* needs_null_check */ false,
1166 /* use_load_acquire */ false,
1167 /* always_update_field */ true);
1168 }
1169 }
1170
1171 UseScratchRegisterScope temps(masm);
1172 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
1173 Register tmp_value = temps.AcquireSameSizeAs(value); // Value in memory.
1174
1175 Register tmp_32 = tmp_value.W();
1176
1177 __ Add(tmp_ptr, base.X(), Operand(offset));
1178
1179 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1180 codegen->GetAssembler()->PoisonHeapReference(expected);
1181 if (value.Is(expected)) {
1182 // Do not poison `value`, as it is the same register as
1183 // `expected`, which has just been poisoned.
1184 } else {
1185 codegen->GetAssembler()->PoisonHeapReference(value);
1186 }
1187 }
1188
1189 // do {
1190 // tmp_value = [tmp_ptr] - expected;
1191 // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
1192 // result = tmp_value != 0;
1193
1194 vixl::aarch64::Label loop_head, exit_loop;
1195 __ Bind(&loop_head);
1196 __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
1197 __ Cmp(tmp_value, expected);
1198 __ B(&exit_loop, ne);
1199 __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
1200 __ Cbnz(tmp_32, &loop_head);
1201 __ Bind(&exit_loop);
1202 __ Cset(out, eq);
1203
1204 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1205 codegen->GetAssembler()->UnpoisonHeapReference(expected);
1206 if (value.Is(expected)) {
1207 // Do not unpoison `value`, as it is the same register as
1208 // `expected`, which has just been unpoisoned.
1209 } else {
1210 codegen->GetAssembler()->UnpoisonHeapReference(value);
1211 }
1212 }
1213 }
1214
VisitUnsafeCASInt(HInvoke * invoke)1215 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1216 CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimInt);
1217 }
VisitUnsafeCASLong(HInvoke * invoke)1218 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1219 CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong);
1220 }
VisitUnsafeCASObject(HInvoke * invoke)1221 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1222 // The only read barrier implementation supporting the
1223 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1224 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1225 return;
1226 }
1227
1228 CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimNot);
1229 }
1230
VisitUnsafeCASInt(HInvoke * invoke)1231 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1232 GenCas(invoke, Primitive::kPrimInt, codegen_);
1233 }
VisitUnsafeCASLong(HInvoke * invoke)1234 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1235 GenCas(invoke, Primitive::kPrimLong, codegen_);
1236 }
VisitUnsafeCASObject(HInvoke * invoke)1237 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1238 // The only read barrier implementation supporting the
1239 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1240 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1241
1242 GenCas(invoke, Primitive::kPrimNot, codegen_);
1243 }
1244
VisitStringCompareTo(HInvoke * invoke)1245 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
1246 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1247 invoke->InputAt(1)->CanBeNull()
1248 ? LocationSummary::kCallOnSlowPath
1249 : LocationSummary::kNoCall,
1250 kIntrinsified);
1251 locations->SetInAt(0, Location::RequiresRegister());
1252 locations->SetInAt(1, Location::RequiresRegister());
1253 locations->AddTemp(Location::RequiresRegister());
1254 locations->AddTemp(Location::RequiresRegister());
1255 locations->AddTemp(Location::RequiresRegister());
1256 // Need temporary registers for String compression's feature.
1257 if (mirror::kUseStringCompression) {
1258 locations->AddTemp(Location::RequiresRegister());
1259 }
1260 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1261 }
1262
VisitStringCompareTo(HInvoke * invoke)1263 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
1264 MacroAssembler* masm = GetVIXLAssembler();
1265 LocationSummary* locations = invoke->GetLocations();
1266
1267 Register str = InputRegisterAt(invoke, 0);
1268 Register arg = InputRegisterAt(invoke, 1);
1269 DCHECK(str.IsW());
1270 DCHECK(arg.IsW());
1271 Register out = OutputRegister(invoke);
1272
1273 Register temp0 = WRegisterFrom(locations->GetTemp(0));
1274 Register temp1 = WRegisterFrom(locations->GetTemp(1));
1275 Register temp2 = WRegisterFrom(locations->GetTemp(2));
1276 Register temp3;
1277 if (mirror::kUseStringCompression) {
1278 temp3 = WRegisterFrom(locations->GetTemp(3));
1279 }
1280
1281 vixl::aarch64::Label loop;
1282 vixl::aarch64::Label find_char_diff;
1283 vixl::aarch64::Label end;
1284 vixl::aarch64::Label different_compression;
1285
1286 // Get offsets of count and value fields within a string object.
1287 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1288 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1289
1290 // Note that the null check must have been done earlier.
1291 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1292
1293 // Take slow path and throw if input can be and is null.
1294 SlowPathCodeARM64* slow_path = nullptr;
1295 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1296 if (can_slow_path) {
1297 slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
1298 codegen_->AddSlowPath(slow_path);
1299 __ Cbz(arg, slow_path->GetEntryLabel());
1300 }
1301
1302 // Reference equality check, return 0 if same reference.
1303 __ Subs(out, str, arg);
1304 __ B(&end, eq);
1305
1306 if (mirror::kUseStringCompression) {
1307 // Load `count` fields of this and argument strings.
1308 __ Ldr(temp3, HeapOperand(str, count_offset));
1309 __ Ldr(temp2, HeapOperand(arg, count_offset));
1310 // Clean out compression flag from lengths.
1311 __ Lsr(temp0, temp3, 1u);
1312 __ Lsr(temp1, temp2, 1u);
1313 } else {
1314 // Load lengths of this and argument strings.
1315 __ Ldr(temp0, HeapOperand(str, count_offset));
1316 __ Ldr(temp1, HeapOperand(arg, count_offset));
1317 }
1318 // out = length diff.
1319 __ Subs(out, temp0, temp1);
1320 // temp0 = min(len(str), len(arg)).
1321 __ Csel(temp0, temp1, temp0, ge);
1322 // Shorter string is empty?
1323 __ Cbz(temp0, &end);
1324
1325 if (mirror::kUseStringCompression) {
1326 // Check if both strings using same compression style to use this comparison loop.
1327 __ Eor(temp2, temp2, Operand(temp3));
1328 // Interleave with compression flag extraction which is needed for both paths
1329 // and also set flags which is needed only for the different compressions path.
1330 __ Ands(temp3.W(), temp3.W(), Operand(1));
1331 __ Tbnz(temp2, 0, &different_compression); // Does not use flags.
1332 }
1333 // Store offset of string value in preparation for comparison loop.
1334 __ Mov(temp1, value_offset);
1335 if (mirror::kUseStringCompression) {
1336 // For string compression, calculate the number of bytes to compare (not chars).
1337 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1338 __ Lsl(temp0, temp0, temp3);
1339 }
1340
1341 UseScratchRegisterScope scratch_scope(masm);
1342 Register temp4 = scratch_scope.AcquireX();
1343
1344 // Assertions that must hold in order to compare strings 8 bytes at a time.
1345 DCHECK_ALIGNED(value_offset, 8);
1346 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1347
1348 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1349 DCHECK_EQ(char_size, 2u);
1350
1351 // Promote temp2 to an X reg, ready for LDR.
1352 temp2 = temp2.X();
1353
1354 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1355 __ Bind(&loop);
1356 __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
1357 __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1358 __ Cmp(temp4, temp2);
1359 __ B(ne, &find_char_diff);
1360 __ Add(temp1, temp1, char_size * 4);
1361 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1362 __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1363 __ B(&loop, hi);
1364 __ B(&end);
1365
1366 // Promote temp1 to an X reg, ready for EOR.
1367 temp1 = temp1.X();
1368
1369 // Find the single character difference.
1370 __ Bind(&find_char_diff);
1371 // Get the bit position of the first character that differs.
1372 __ Eor(temp1, temp2, temp4);
1373 __ Rbit(temp1, temp1);
1374 __ Clz(temp1, temp1);
1375
1376 // If the number of chars remaining <= the index where the difference occurs (0-3), then
1377 // the difference occurs outside the remaining string data, so just return length diff (out).
1378 // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1379 // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1380 // unsigned when string compression is disabled.
1381 // When it's enabled, the comparison must be unsigned.
1382 __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
1383 __ B(ls, &end);
1384
1385 // Extract the characters and calculate the difference.
1386 if (mirror:: kUseStringCompression) {
1387 __ Bic(temp1, temp1, 0x7);
1388 __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1389 } else {
1390 __ Bic(temp1, temp1, 0xf);
1391 }
1392 __ Lsr(temp2, temp2, temp1);
1393 __ Lsr(temp4, temp4, temp1);
1394 if (mirror::kUseStringCompression) {
1395 // Prioritize the case of compressed strings and calculate such result first.
1396 __ Uxtb(temp1, temp4);
1397 __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1398 __ Tbz(temp3, 0u, &end); // If actually compressed, we're done.
1399 }
1400 __ Uxth(temp4, temp4);
1401 __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
1402
1403 if (mirror::kUseStringCompression) {
1404 __ B(&end);
1405 __ Bind(&different_compression);
1406
1407 // Comparison for different compression style.
1408 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1409 DCHECK_EQ(c_char_size, 1u);
1410 temp1 = temp1.W();
1411 temp2 = temp2.W();
1412 temp4 = temp4.W();
1413
1414 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1415 // Note that flags have been set by the `str` compression flag extraction to `temp3`
1416 // before branching to the `different_compression` label.
1417 __ Csel(temp1, str, arg, eq); // Pointer to the compressed string.
1418 __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string.
1419
1420 // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1421 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1422 // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1423 // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1424 __ Lsl(temp0, temp0, 1u);
1425
1426 // Adjust temp1 and temp2 from string pointers to data pointers.
1427 __ Add(temp1, temp1, Operand(value_offset));
1428 __ Add(temp2, temp2, Operand(value_offset));
1429
1430 // Complete the move of the compression flag.
1431 __ Sub(temp0, temp0, Operand(temp3));
1432
1433 vixl::aarch64::Label different_compression_loop;
1434 vixl::aarch64::Label different_compression_diff;
1435
1436 __ Bind(&different_compression_loop);
1437 __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1438 __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1439 __ Subs(temp4, temp4, Operand(temp3));
1440 __ B(&different_compression_diff, ne);
1441 __ Subs(temp0, temp0, 2);
1442 __ B(&different_compression_loop, hi);
1443 __ B(&end);
1444
1445 // Calculate the difference.
1446 __ Bind(&different_compression_diff);
1447 __ Tst(temp0, Operand(1));
1448 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1449 "Expecting 0=compressed, 1=uncompressed");
1450 __ Cneg(out, temp4, ne);
1451 }
1452
1453 __ Bind(&end);
1454
1455 if (can_slow_path) {
1456 __ Bind(slow_path->GetExitLabel());
1457 }
1458 }
1459
1460 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1461 // The normal loop plus the pre-header is 9 instructions without string compression and 12
1462 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
1463 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1464 // to 10 instructions for the unrolled loop.
1465 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1466
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1467 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1468 if (candidate->IsLoadString()) {
1469 HLoadString* load_string = candidate->AsLoadString();
1470 const DexFile& dex_file = load_string->GetDexFile();
1471 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1472 }
1473 return nullptr;
1474 }
1475
VisitStringEquals(HInvoke * invoke)1476 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
1477 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1478 LocationSummary::kNoCall,
1479 kIntrinsified);
1480 locations->SetInAt(0, Location::RequiresRegister());
1481 locations->SetInAt(1, Location::RequiresRegister());
1482
1483 // For the generic implementation and for long const strings we need a temporary.
1484 // We do not need it for short const strings, up to 8 bytes, see code generation below.
1485 uint32_t const_string_length = 0u;
1486 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1487 if (const_string == nullptr) {
1488 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1489 }
1490 bool is_compressed =
1491 mirror::kUseStringCompression &&
1492 const_string != nullptr &&
1493 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1494 if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1495 locations->AddTemp(Location::RequiresRegister());
1496 }
1497
1498 // TODO: If the String.equals() is used only for an immediately following HIf, we can
1499 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1500 // Then we shall need an extra temporary register instead of the output register.
1501 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1502 }
1503
VisitStringEquals(HInvoke * invoke)1504 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
1505 MacroAssembler* masm = GetVIXLAssembler();
1506 LocationSummary* locations = invoke->GetLocations();
1507
1508 Register str = WRegisterFrom(locations->InAt(0));
1509 Register arg = WRegisterFrom(locations->InAt(1));
1510 Register out = XRegisterFrom(locations->Out());
1511
1512 UseScratchRegisterScope scratch_scope(masm);
1513 Register temp = scratch_scope.AcquireW();
1514 Register temp1 = scratch_scope.AcquireW();
1515
1516 vixl::aarch64::Label loop;
1517 vixl::aarch64::Label end;
1518 vixl::aarch64::Label return_true;
1519 vixl::aarch64::Label return_false;
1520
1521 // Get offsets of count, value, and class fields within a string object.
1522 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1523 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1524 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1525
1526 // Note that the null check must have been done earlier.
1527 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1528
1529 StringEqualsOptimizations optimizations(invoke);
1530 if (!optimizations.GetArgumentNotNull()) {
1531 // Check if input is null, return false if it is.
1532 __ Cbz(arg, &return_false);
1533 }
1534
1535 // Reference equality check, return true if same reference.
1536 __ Cmp(str, arg);
1537 __ B(&return_true, eq);
1538
1539 if (!optimizations.GetArgumentIsString()) {
1540 // Instanceof check for the argument by comparing class fields.
1541 // All string objects must have the same type since String cannot be subclassed.
1542 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1543 // If the argument is a string object, its class field must be equal to receiver's class field.
1544 __ Ldr(temp, MemOperand(str.X(), class_offset));
1545 __ Ldr(temp1, MemOperand(arg.X(), class_offset));
1546 __ Cmp(temp, temp1);
1547 __ B(&return_false, ne);
1548 }
1549
1550 // Check if one of the inputs is a const string. Do not special-case both strings
1551 // being const, such cases should be handled by constant folding if needed.
1552 uint32_t const_string_length = 0u;
1553 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1554 if (const_string == nullptr) {
1555 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1556 if (const_string != nullptr) {
1557 std::swap(str, arg); // Make sure the const string is in `str`.
1558 }
1559 }
1560 bool is_compressed =
1561 mirror::kUseStringCompression &&
1562 const_string != nullptr &&
1563 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1564
1565 if (const_string != nullptr) {
1566 // Load `count` field of the argument string and check if it matches the const string.
1567 // Also compares the compression style, if differs return false.
1568 __ Ldr(temp, MemOperand(arg.X(), count_offset));
1569 // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
1570 scratch_scope.Release(temp1);
1571 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1572 temp1 = scratch_scope.AcquireW();
1573 __ B(&return_false, ne);
1574 } else {
1575 // Load `count` fields of this and argument strings.
1576 __ Ldr(temp, MemOperand(str.X(), count_offset));
1577 __ Ldr(temp1, MemOperand(arg.X(), count_offset));
1578 // Check if `count` fields are equal, return false if they're not.
1579 // Also compares the compression style, if differs return false.
1580 __ Cmp(temp, temp1);
1581 __ B(&return_false, ne);
1582 }
1583
1584 // Assertions that must hold in order to compare strings 8 bytes at a time.
1585 DCHECK_ALIGNED(value_offset, 8);
1586 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1587
1588 if (const_string != nullptr &&
1589 const_string_length < (is_compressed ? kShortConstStringEqualsCutoffInBytes
1590 : kShortConstStringEqualsCutoffInBytes / 2u)) {
1591 // Load and compare the contents. Though we know the contents of the short const string
1592 // at compile time, materializing constants may be more code than loading from memory.
1593 int32_t offset = value_offset;
1594 size_t remaining_bytes =
1595 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
1596 temp = temp.X();
1597 temp1 = temp1.X();
1598 while (remaining_bytes > 8u) {
1599 Register temp2 = XRegisterFrom(locations->GetTemp(0));
1600 __ Ldp(temp, temp1, MemOperand(str.X(), offset));
1601 __ Ldp(temp2, out, MemOperand(arg.X(), offset));
1602 __ Cmp(temp, temp2);
1603 __ Ccmp(temp1, out, NoFlag, eq);
1604 __ B(&return_false, ne);
1605 offset += 2u * sizeof(uint64_t);
1606 remaining_bytes -= 2u * sizeof(uint64_t);
1607 }
1608 if (remaining_bytes != 0u) {
1609 __ Ldr(temp, MemOperand(str.X(), offset));
1610 __ Ldr(temp1, MemOperand(arg.X(), offset));
1611 __ Cmp(temp, temp1);
1612 __ B(&return_false, ne);
1613 }
1614 } else {
1615 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1616 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1617 "Expecting 0=compressed, 1=uncompressed");
1618 __ Cbz(temp, &return_true);
1619
1620 if (mirror::kUseStringCompression) {
1621 // For string compression, calculate the number of bytes to compare (not chars).
1622 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1623 __ And(temp1, temp, Operand(1)); // Extract compression flag.
1624 __ Lsr(temp, temp, 1u); // Extract length.
1625 __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare.
1626 }
1627
1628 // Store offset of string value in preparation for comparison loop
1629 __ Mov(temp1, value_offset);
1630
1631 temp1 = temp1.X();
1632 Register temp2 = XRegisterFrom(locations->GetTemp(0));
1633 // Loop to compare strings 8 bytes at a time starting at the front of the string.
1634 // Ok to do this because strings are zero-padded to kObjectAlignment.
1635 __ Bind(&loop);
1636 __ Ldr(out, MemOperand(str.X(), temp1));
1637 __ Ldr(temp2, MemOperand(arg.X(), temp1));
1638 __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
1639 __ Cmp(out, temp2);
1640 __ B(&return_false, ne);
1641 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1642 __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
1643 __ B(&loop, hi);
1644 }
1645
1646 // Return true and exit the function.
1647 // If loop does not result in returning false, we return true.
1648 __ Bind(&return_true);
1649 __ Mov(out, 1);
1650 __ B(&end);
1651
1652 // Return false and exit the function.
1653 __ Bind(&return_false);
1654 __ Mov(out, 0);
1655 __ Bind(&end);
1656 }
1657
GenerateVisitStringIndexOf(HInvoke * invoke,MacroAssembler * masm,CodeGeneratorARM64 * codegen,ArenaAllocator * allocator,bool start_at_zero)1658 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1659 MacroAssembler* masm,
1660 CodeGeneratorARM64* codegen,
1661 ArenaAllocator* allocator,
1662 bool start_at_zero) {
1663 LocationSummary* locations = invoke->GetLocations();
1664
1665 // Note that the null check must have been done earlier.
1666 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1667
1668 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1669 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1670 SlowPathCodeARM64* slow_path = nullptr;
1671 HInstruction* code_point = invoke->InputAt(1);
1672 if (code_point->IsIntConstant()) {
1673 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
1674 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1675 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1676 slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
1677 codegen->AddSlowPath(slow_path);
1678 __ B(slow_path->GetEntryLabel());
1679 __ Bind(slow_path->GetExitLabel());
1680 return;
1681 }
1682 } else if (code_point->GetType() != Primitive::kPrimChar) {
1683 Register char_reg = WRegisterFrom(locations->InAt(1));
1684 __ Tst(char_reg, 0xFFFF0000);
1685 slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
1686 codegen->AddSlowPath(slow_path);
1687 __ B(ne, slow_path->GetEntryLabel());
1688 }
1689
1690 if (start_at_zero) {
1691 // Start-index = 0.
1692 Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
1693 __ Mov(tmp_reg, 0);
1694 }
1695
1696 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1697 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1698
1699 if (slow_path != nullptr) {
1700 __ Bind(slow_path->GetExitLabel());
1701 }
1702 }
1703
VisitStringIndexOf(HInvoke * invoke)1704 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
1705 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1706 LocationSummary::kCallOnMainAndSlowPath,
1707 kIntrinsified);
1708 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1709 // best to align the inputs accordingly.
1710 InvokeRuntimeCallingConvention calling_convention;
1711 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1712 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1713 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
1714
1715 // Need to send start_index=0.
1716 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1717 }
1718
VisitStringIndexOf(HInvoke * invoke)1719 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
1720 GenerateVisitStringIndexOf(
1721 invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1722 }
1723
VisitStringIndexOfAfter(HInvoke * invoke)1724 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1725 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1726 LocationSummary::kCallOnMainAndSlowPath,
1727 kIntrinsified);
1728 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1729 // best to align the inputs accordingly.
1730 InvokeRuntimeCallingConvention calling_convention;
1731 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1732 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1733 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1734 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
1735 }
1736
VisitStringIndexOfAfter(HInvoke * invoke)1737 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1738 GenerateVisitStringIndexOf(
1739 invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1740 }
1741
VisitStringNewStringFromBytes(HInvoke * invoke)1742 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1743 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1744 LocationSummary::kCallOnMainAndSlowPath,
1745 kIntrinsified);
1746 InvokeRuntimeCallingConvention calling_convention;
1747 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1748 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1749 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1750 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1751 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
1752 }
1753
VisitStringNewStringFromBytes(HInvoke * invoke)1754 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1755 MacroAssembler* masm = GetVIXLAssembler();
1756 LocationSummary* locations = invoke->GetLocations();
1757
1758 Register byte_array = WRegisterFrom(locations->InAt(0));
1759 __ Cmp(byte_array, 0);
1760 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
1761 codegen_->AddSlowPath(slow_path);
1762 __ B(eq, slow_path->GetEntryLabel());
1763
1764 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1765 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1766 __ Bind(slow_path->GetExitLabel());
1767 }
1768
VisitStringNewStringFromChars(HInvoke * invoke)1769 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1770 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1771 LocationSummary::kCallOnMainOnly,
1772 kIntrinsified);
1773 InvokeRuntimeCallingConvention calling_convention;
1774 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1775 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1776 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1777 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
1778 }
1779
VisitStringNewStringFromChars(HInvoke * invoke)1780 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1781 // No need to emit code checking whether `locations->InAt(2)` is a null
1782 // pointer, as callers of the native method
1783 //
1784 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1785 //
1786 // all include a null check on `data` before calling that method.
1787 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1788 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1789 }
1790
VisitStringNewStringFromString(HInvoke * invoke)1791 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1792 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1793 LocationSummary::kCallOnMainAndSlowPath,
1794 kIntrinsified);
1795 InvokeRuntimeCallingConvention calling_convention;
1796 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1797 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
1798 }
1799
VisitStringNewStringFromString(HInvoke * invoke)1800 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1801 MacroAssembler* masm = GetVIXLAssembler();
1802 LocationSummary* locations = invoke->GetLocations();
1803
1804 Register string_to_copy = WRegisterFrom(locations->InAt(0));
1805 __ Cmp(string_to_copy, 0);
1806 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
1807 codegen_->AddSlowPath(slow_path);
1808 __ B(eq, slow_path->GetEntryLabel());
1809
1810 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1811 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1812 __ Bind(slow_path->GetExitLabel());
1813 }
1814
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)1815 static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
1816 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1817 DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1818 DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
1819
1820 LocationSummary* const locations = new (arena) LocationSummary(invoke,
1821 LocationSummary::kCallOnMainOnly,
1822 kIntrinsified);
1823 InvokeRuntimeCallingConvention calling_convention;
1824
1825 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1826 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1827 }
1828
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)1829 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
1830 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1831 DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1832 DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(1)->GetType()));
1833 DCHECK(Primitive::IsFloatingPointType(invoke->GetType()));
1834
1835 LocationSummary* const locations = new (arena) LocationSummary(invoke,
1836 LocationSummary::kCallOnMainOnly,
1837 kIntrinsified);
1838 InvokeRuntimeCallingConvention calling_convention;
1839
1840 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1841 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
1842 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1843 }
1844
GenFPToFPCall(HInvoke * invoke,CodeGeneratorARM64 * codegen,QuickEntrypointEnum entry)1845 static void GenFPToFPCall(HInvoke* invoke,
1846 CodeGeneratorARM64* codegen,
1847 QuickEntrypointEnum entry) {
1848 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1849 }
1850
VisitMathCos(HInvoke * invoke)1851 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
1852 CreateFPToFPCallLocations(arena_, invoke);
1853 }
1854
VisitMathCos(HInvoke * invoke)1855 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
1856 GenFPToFPCall(invoke, codegen_, kQuickCos);
1857 }
1858
VisitMathSin(HInvoke * invoke)1859 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
1860 CreateFPToFPCallLocations(arena_, invoke);
1861 }
1862
VisitMathSin(HInvoke * invoke)1863 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
1864 GenFPToFPCall(invoke, codegen_, kQuickSin);
1865 }
1866
VisitMathAcos(HInvoke * invoke)1867 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
1868 CreateFPToFPCallLocations(arena_, invoke);
1869 }
1870
VisitMathAcos(HInvoke * invoke)1871 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
1872 GenFPToFPCall(invoke, codegen_, kQuickAcos);
1873 }
1874
VisitMathAsin(HInvoke * invoke)1875 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
1876 CreateFPToFPCallLocations(arena_, invoke);
1877 }
1878
VisitMathAsin(HInvoke * invoke)1879 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
1880 GenFPToFPCall(invoke, codegen_, kQuickAsin);
1881 }
1882
VisitMathAtan(HInvoke * invoke)1883 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
1884 CreateFPToFPCallLocations(arena_, invoke);
1885 }
1886
VisitMathAtan(HInvoke * invoke)1887 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
1888 GenFPToFPCall(invoke, codegen_, kQuickAtan);
1889 }
1890
VisitMathCbrt(HInvoke * invoke)1891 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
1892 CreateFPToFPCallLocations(arena_, invoke);
1893 }
1894
VisitMathCbrt(HInvoke * invoke)1895 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
1896 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1897 }
1898
VisitMathCosh(HInvoke * invoke)1899 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
1900 CreateFPToFPCallLocations(arena_, invoke);
1901 }
1902
VisitMathCosh(HInvoke * invoke)1903 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
1904 GenFPToFPCall(invoke, codegen_, kQuickCosh);
1905 }
1906
VisitMathExp(HInvoke * invoke)1907 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
1908 CreateFPToFPCallLocations(arena_, invoke);
1909 }
1910
VisitMathExp(HInvoke * invoke)1911 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
1912 GenFPToFPCall(invoke, codegen_, kQuickExp);
1913 }
1914
VisitMathExpm1(HInvoke * invoke)1915 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
1916 CreateFPToFPCallLocations(arena_, invoke);
1917 }
1918
VisitMathExpm1(HInvoke * invoke)1919 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
1920 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1921 }
1922
VisitMathLog(HInvoke * invoke)1923 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
1924 CreateFPToFPCallLocations(arena_, invoke);
1925 }
1926
VisitMathLog(HInvoke * invoke)1927 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
1928 GenFPToFPCall(invoke, codegen_, kQuickLog);
1929 }
1930
VisitMathLog10(HInvoke * invoke)1931 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
1932 CreateFPToFPCallLocations(arena_, invoke);
1933 }
1934
VisitMathLog10(HInvoke * invoke)1935 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
1936 GenFPToFPCall(invoke, codegen_, kQuickLog10);
1937 }
1938
VisitMathSinh(HInvoke * invoke)1939 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
1940 CreateFPToFPCallLocations(arena_, invoke);
1941 }
1942
VisitMathSinh(HInvoke * invoke)1943 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
1944 GenFPToFPCall(invoke, codegen_, kQuickSinh);
1945 }
1946
VisitMathTan(HInvoke * invoke)1947 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
1948 CreateFPToFPCallLocations(arena_, invoke);
1949 }
1950
VisitMathTan(HInvoke * invoke)1951 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
1952 GenFPToFPCall(invoke, codegen_, kQuickTan);
1953 }
1954
VisitMathTanh(HInvoke * invoke)1955 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
1956 CreateFPToFPCallLocations(arena_, invoke);
1957 }
1958
VisitMathTanh(HInvoke * invoke)1959 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
1960 GenFPToFPCall(invoke, codegen_, kQuickTanh);
1961 }
1962
VisitMathAtan2(HInvoke * invoke)1963 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
1964 CreateFPFPToFPCallLocations(arena_, invoke);
1965 }
1966
VisitMathAtan2(HInvoke * invoke)1967 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
1968 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1969 }
1970
VisitMathHypot(HInvoke * invoke)1971 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
1972 CreateFPFPToFPCallLocations(arena_, invoke);
1973 }
1974
VisitMathHypot(HInvoke * invoke)1975 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
1976 GenFPToFPCall(invoke, codegen_, kQuickHypot);
1977 }
1978
VisitMathNextAfter(HInvoke * invoke)1979 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
1980 CreateFPFPToFPCallLocations(arena_, invoke);
1981 }
1982
VisitMathNextAfter(HInvoke * invoke)1983 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
1984 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1985 }
1986
VisitStringGetCharsNoCheck(HInvoke * invoke)1987 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1988 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1989 LocationSummary::kNoCall,
1990 kIntrinsified);
1991 locations->SetInAt(0, Location::RequiresRegister());
1992 locations->SetInAt(1, Location::RequiresRegister());
1993 locations->SetInAt(2, Location::RequiresRegister());
1994 locations->SetInAt(3, Location::RequiresRegister());
1995 locations->SetInAt(4, Location::RequiresRegister());
1996
1997 locations->AddTemp(Location::RequiresRegister());
1998 locations->AddTemp(Location::RequiresRegister());
1999 locations->AddTemp(Location::RequiresRegister());
2000 }
2001
VisitStringGetCharsNoCheck(HInvoke * invoke)2002 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2003 MacroAssembler* masm = GetVIXLAssembler();
2004 LocationSummary* locations = invoke->GetLocations();
2005
2006 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2007 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2008 DCHECK_EQ(char_size, 2u);
2009
2010 // Location of data in char array buffer.
2011 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2012
2013 // Location of char array data in string.
2014 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2015
2016 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2017 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2018 Register srcObj = XRegisterFrom(locations->InAt(0));
2019 Register srcBegin = XRegisterFrom(locations->InAt(1));
2020 Register srcEnd = XRegisterFrom(locations->InAt(2));
2021 Register dstObj = XRegisterFrom(locations->InAt(3));
2022 Register dstBegin = XRegisterFrom(locations->InAt(4));
2023
2024 Register src_ptr = XRegisterFrom(locations->GetTemp(0));
2025 Register num_chr = XRegisterFrom(locations->GetTemp(1));
2026 Register tmp1 = XRegisterFrom(locations->GetTemp(2));
2027
2028 UseScratchRegisterScope temps(masm);
2029 Register dst_ptr = temps.AcquireX();
2030 Register tmp2 = temps.AcquireX();
2031
2032 vixl::aarch64::Label done;
2033 vixl::aarch64::Label compressed_string_loop;
2034 __ Sub(num_chr, srcEnd, srcBegin);
2035 // Early out for valid zero-length retrievals.
2036 __ Cbz(num_chr, &done);
2037
2038 // dst address start to copy to.
2039 __ Add(dst_ptr, dstObj, Operand(data_offset));
2040 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
2041
2042 // src address to copy from.
2043 __ Add(src_ptr, srcObj, Operand(value_offset));
2044 vixl::aarch64::Label compressed_string_preloop;
2045 if (mirror::kUseStringCompression) {
2046 // Location of count in string.
2047 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2048 // String's length.
2049 __ Ldr(tmp2, MemOperand(srcObj, count_offset));
2050 __ Tbz(tmp2, 0, &compressed_string_preloop);
2051 }
2052 __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
2053
2054 // Do the copy.
2055 vixl::aarch64::Label loop;
2056 vixl::aarch64::Label remainder;
2057
2058 // Save repairing the value of num_chr on the < 8 character path.
2059 __ Subs(tmp1, num_chr, 8);
2060 __ B(lt, &remainder);
2061
2062 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2063 __ Mov(num_chr, tmp1);
2064
2065 // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
2066 // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
2067 __ Bind(&loop);
2068 __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
2069 __ Subs(num_chr, num_chr, 8);
2070 __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
2071 __ B(ge, &loop);
2072
2073 __ Adds(num_chr, num_chr, 8);
2074 __ B(eq, &done);
2075
2076 // Main loop for < 8 character case and remainder handling. Loads and stores one
2077 // 16-bit Java character at a time.
2078 __ Bind(&remainder);
2079 __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
2080 __ Subs(num_chr, num_chr, 1);
2081 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2082 __ B(gt, &remainder);
2083 __ B(&done);
2084
2085 if (mirror::kUseStringCompression) {
2086 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2087 DCHECK_EQ(c_char_size, 1u);
2088 __ Bind(&compressed_string_preloop);
2089 __ Add(src_ptr, src_ptr, Operand(srcBegin));
2090 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2091 __ Bind(&compressed_string_loop);
2092 __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2093 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2094 __ Subs(num_chr, num_chr, Operand(1));
2095 __ B(gt, &compressed_string_loop);
2096 }
2097
2098 __ Bind(&done);
2099 }
2100
2101 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
2102 // implementation there for longer copy lengths.
2103 static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
2104
SetSystemArrayCopyLocationRequires(LocationSummary * locations,uint32_t at,HInstruction * input)2105 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
2106 uint32_t at,
2107 HInstruction* input) {
2108 HIntConstant* const_input = input->AsIntConstant();
2109 if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
2110 locations->SetInAt(at, Location::RequiresRegister());
2111 } else {
2112 locations->SetInAt(at, Location::RegisterOrConstant(input));
2113 }
2114 }
2115
VisitSystemArrayCopyChar(HInvoke * invoke)2116 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2117 // Check to see if we have known failures that will cause us to have to bail out
2118 // to the runtime, and just generate the runtime call directly.
2119 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2120 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2121
2122 // The positions must be non-negative.
2123 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2124 (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2125 // We will have to fail anyways.
2126 return;
2127 }
2128
2129 // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2130 // native implementation.
2131 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2132 if (length != nullptr) {
2133 int32_t len = length->GetValue();
2134 if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2135 // Just call as normal.
2136 return;
2137 }
2138 }
2139
2140 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
2141 LocationSummary* locations = new (allocator) LocationSummary(invoke,
2142 LocationSummary::kCallOnSlowPath,
2143 kIntrinsified);
2144 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2145 locations->SetInAt(0, Location::RequiresRegister());
2146 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2147 locations->SetInAt(2, Location::RequiresRegister());
2148 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2149 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2150
2151 locations->AddTemp(Location::RequiresRegister());
2152 locations->AddTemp(Location::RequiresRegister());
2153 locations->AddTemp(Location::RequiresRegister());
2154 }
2155
CheckSystemArrayCopyPosition(MacroAssembler * masm,const Location & pos,const Register & input,const Location & length,SlowPathCodeARM64 * slow_path,const Register & temp,bool length_is_input_length=false)2156 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2157 const Location& pos,
2158 const Register& input,
2159 const Location& length,
2160 SlowPathCodeARM64* slow_path,
2161 const Register& temp,
2162 bool length_is_input_length = false) {
2163 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2164 if (pos.IsConstant()) {
2165 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2166 if (pos_const == 0) {
2167 if (!length_is_input_length) {
2168 // Check that length(input) >= length.
2169 __ Ldr(temp, MemOperand(input, length_offset));
2170 __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
2171 __ B(slow_path->GetEntryLabel(), lt);
2172 }
2173 } else {
2174 // Check that length(input) >= pos.
2175 __ Ldr(temp, MemOperand(input, length_offset));
2176 __ Subs(temp, temp, pos_const);
2177 __ B(slow_path->GetEntryLabel(), lt);
2178
2179 // Check that (length(input) - pos) >= length.
2180 __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
2181 __ B(slow_path->GetEntryLabel(), lt);
2182 }
2183 } else if (length_is_input_length) {
2184 // The only way the copy can succeed is if pos is zero.
2185 __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2186 } else {
2187 // Check that pos >= 0.
2188 Register pos_reg = WRegisterFrom(pos);
2189 __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
2190
2191 // Check that pos <= length(input) && (length(input) - pos) >= length.
2192 __ Ldr(temp, MemOperand(input, length_offset));
2193 __ Subs(temp, temp, pos_reg);
2194 // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
2195 __ Ccmp(temp, OperandFrom(length, Primitive::kPrimInt), NFlag, ge);
2196 __ B(slow_path->GetEntryLabel(), lt);
2197 }
2198 }
2199
2200 // Compute base source address, base destination address, and end
2201 // source address for System.arraycopy* intrinsics in `src_base`,
2202 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(MacroAssembler * masm,Primitive::Type type,const Register & src,const Location & src_pos,const Register & dst,const Location & dst_pos,const Location & copy_length,const Register & src_base,const Register & dst_base,const Register & src_end)2203 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
2204 Primitive::Type type,
2205 const Register& src,
2206 const Location& src_pos,
2207 const Register& dst,
2208 const Location& dst_pos,
2209 const Location& copy_length,
2210 const Register& src_base,
2211 const Register& dst_base,
2212 const Register& src_end) {
2213 // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
2214 DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar)
2215 << "Unexpected element type: " << type;
2216 const int32_t element_size = Primitive::ComponentSize(type);
2217 const int32_t element_size_shift = Primitive::ComponentSizeShift(type);
2218 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2219
2220 if (src_pos.IsConstant()) {
2221 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2222 __ Add(src_base, src, element_size * constant + data_offset);
2223 } else {
2224 __ Add(src_base, src, data_offset);
2225 __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
2226 }
2227
2228 if (dst_pos.IsConstant()) {
2229 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
2230 __ Add(dst_base, dst, element_size * constant + data_offset);
2231 } else {
2232 __ Add(dst_base, dst, data_offset);
2233 __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
2234 }
2235
2236 if (copy_length.IsConstant()) {
2237 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2238 __ Add(src_end, src_base, element_size * constant);
2239 } else {
2240 __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
2241 }
2242 }
2243
VisitSystemArrayCopyChar(HInvoke * invoke)2244 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2245 MacroAssembler* masm = GetVIXLAssembler();
2246 LocationSummary* locations = invoke->GetLocations();
2247 Register src = XRegisterFrom(locations->InAt(0));
2248 Location src_pos = locations->InAt(1);
2249 Register dst = XRegisterFrom(locations->InAt(2));
2250 Location dst_pos = locations->InAt(3);
2251 Location length = locations->InAt(4);
2252
2253 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
2254 codegen_->AddSlowPath(slow_path);
2255
2256 // If source and destination are the same, take the slow path. Overlapping copy regions must be
2257 // copied in reverse and we can't know in all cases if it's needed.
2258 __ Cmp(src, dst);
2259 __ B(slow_path->GetEntryLabel(), eq);
2260
2261 // Bail out if the source is null.
2262 __ Cbz(src, slow_path->GetEntryLabel());
2263
2264 // Bail out if the destination is null.
2265 __ Cbz(dst, slow_path->GetEntryLabel());
2266
2267 if (!length.IsConstant()) {
2268 // Merge the following two comparisons into one:
2269 // If the length is negative, bail out (delegate to libcore's native implementation).
2270 // If the length > 32 then (currently) prefer libcore's native implementation.
2271 __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
2272 __ B(slow_path->GetEntryLabel(), hi);
2273 } else {
2274 // We have already checked in the LocationsBuilder for the constant case.
2275 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2276 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2277 }
2278
2279 Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2280 Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2281 Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2282
2283 CheckSystemArrayCopyPosition(masm,
2284 src_pos,
2285 src,
2286 length,
2287 slow_path,
2288 src_curr_addr,
2289 false);
2290
2291 CheckSystemArrayCopyPosition(masm,
2292 dst_pos,
2293 dst,
2294 length,
2295 slow_path,
2296 src_curr_addr,
2297 false);
2298
2299 src_curr_addr = src_curr_addr.X();
2300 dst_curr_addr = dst_curr_addr.X();
2301 src_stop_addr = src_stop_addr.X();
2302
2303 GenSystemArrayCopyAddresses(masm,
2304 Primitive::kPrimChar,
2305 src,
2306 src_pos,
2307 dst,
2308 dst_pos,
2309 length,
2310 src_curr_addr,
2311 dst_curr_addr,
2312 src_stop_addr);
2313
2314 // Iterate over the arrays and do a raw copy of the chars.
2315 const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2316 UseScratchRegisterScope temps(masm);
2317 Register tmp = temps.AcquireW();
2318 vixl::aarch64::Label loop, done;
2319 __ Bind(&loop);
2320 __ Cmp(src_curr_addr, src_stop_addr);
2321 __ B(&done, eq);
2322 __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2323 __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
2324 __ B(&loop);
2325 __ Bind(&done);
2326
2327 __ Bind(slow_path->GetExitLabel());
2328 }
2329
2330 // We can choose to use the native implementation there for longer copy lengths.
2331 static constexpr int32_t kSystemArrayCopyThreshold = 128;
2332
2333 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2334 // We want to use two temporary registers in order to reduce the register pressure in arm64.
2335 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
VisitSystemArrayCopy(HInvoke * invoke)2336 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2337 // The only read barrier implementation supporting the
2338 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2339 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2340 return;
2341 }
2342
2343 // Check to see if we have known failures that will cause us to have to bail out
2344 // to the runtime, and just generate the runtime call directly.
2345 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2346 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2347
2348 // The positions must be non-negative.
2349 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2350 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2351 // We will have to fail anyways.
2352 return;
2353 }
2354
2355 // The length must be >= 0.
2356 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2357 if (length != nullptr) {
2358 int32_t len = length->GetValue();
2359 if (len < 0 || len >= kSystemArrayCopyThreshold) {
2360 // Just call as normal.
2361 return;
2362 }
2363 }
2364
2365 SystemArrayCopyOptimizations optimizations(invoke);
2366
2367 if (optimizations.GetDestinationIsSource()) {
2368 if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2369 // We only support backward copying if source and destination are the same.
2370 return;
2371 }
2372 }
2373
2374 if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2375 // We currently don't intrinsify primitive copying.
2376 return;
2377 }
2378
2379 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
2380 LocationSummary* locations = new (allocator) LocationSummary(invoke,
2381 LocationSummary::kCallOnSlowPath,
2382 kIntrinsified);
2383 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2384 locations->SetInAt(0, Location::RequiresRegister());
2385 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2386 locations->SetInAt(2, Location::RequiresRegister());
2387 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2388 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2389
2390 locations->AddTemp(Location::RequiresRegister());
2391 locations->AddTemp(Location::RequiresRegister());
2392 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2393 // Temporary register IP0, obtained from the VIXL scratch register
2394 // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2395 // (because that register is clobbered by ReadBarrierMarkRegX
2396 // entry points). It cannot be used in calls to
2397 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2398 // either. For these reasons, get a third extra temporary register
2399 // from the register allocator.
2400 locations->AddTemp(Location::RequiresRegister());
2401 } else {
2402 // Cases other than Baker read barriers: the third temporary will
2403 // be acquired from the VIXL scratch register pool.
2404 }
2405 }
2406
VisitSystemArrayCopy(HInvoke * invoke)2407 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2408 // The only read barrier implementation supporting the
2409 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2410 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2411
2412 MacroAssembler* masm = GetVIXLAssembler();
2413 LocationSummary* locations = invoke->GetLocations();
2414
2415 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2416 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2417 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2418 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2419 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2420
2421 Register src = XRegisterFrom(locations->InAt(0));
2422 Location src_pos = locations->InAt(1);
2423 Register dest = XRegisterFrom(locations->InAt(2));
2424 Location dest_pos = locations->InAt(3);
2425 Location length = locations->InAt(4);
2426 Register temp1 = WRegisterFrom(locations->GetTemp(0));
2427 Location temp1_loc = LocationFrom(temp1);
2428 Register temp2 = WRegisterFrom(locations->GetTemp(1));
2429 Location temp2_loc = LocationFrom(temp2);
2430
2431 SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
2432 codegen_->AddSlowPath(intrinsic_slow_path);
2433
2434 vixl::aarch64::Label conditions_on_positions_validated;
2435 SystemArrayCopyOptimizations optimizations(invoke);
2436
2437 // If source and destination are the same, we go to slow path if we need to do
2438 // forward copying.
2439 if (src_pos.IsConstant()) {
2440 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2441 if (dest_pos.IsConstant()) {
2442 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2443 if (optimizations.GetDestinationIsSource()) {
2444 // Checked when building locations.
2445 DCHECK_GE(src_pos_constant, dest_pos_constant);
2446 } else if (src_pos_constant < dest_pos_constant) {
2447 __ Cmp(src, dest);
2448 __ B(intrinsic_slow_path->GetEntryLabel(), eq);
2449 }
2450 // Checked when building locations.
2451 DCHECK(!optimizations.GetDestinationIsSource()
2452 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2453 } else {
2454 if (!optimizations.GetDestinationIsSource()) {
2455 __ Cmp(src, dest);
2456 __ B(&conditions_on_positions_validated, ne);
2457 }
2458 __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
2459 __ B(intrinsic_slow_path->GetEntryLabel(), gt);
2460 }
2461 } else {
2462 if (!optimizations.GetDestinationIsSource()) {
2463 __ Cmp(src, dest);
2464 __ B(&conditions_on_positions_validated, ne);
2465 }
2466 __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2467 OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
2468 __ B(intrinsic_slow_path->GetEntryLabel(), lt);
2469 }
2470
2471 __ Bind(&conditions_on_positions_validated);
2472
2473 if (!optimizations.GetSourceIsNotNull()) {
2474 // Bail out if the source is null.
2475 __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
2476 }
2477
2478 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2479 // Bail out if the destination is null.
2480 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
2481 }
2482
2483 // We have already checked in the LocationsBuilder for the constant case.
2484 if (!length.IsConstant() &&
2485 !optimizations.GetCountIsSourceLength() &&
2486 !optimizations.GetCountIsDestinationLength()) {
2487 // Merge the following two comparisons into one:
2488 // If the length is negative, bail out (delegate to libcore's native implementation).
2489 // If the length >= 128 then (currently) prefer native implementation.
2490 __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
2491 __ B(intrinsic_slow_path->GetEntryLabel(), hs);
2492 }
2493 // Validity checks: source.
2494 CheckSystemArrayCopyPosition(masm,
2495 src_pos,
2496 src,
2497 length,
2498 intrinsic_slow_path,
2499 temp1,
2500 optimizations.GetCountIsSourceLength());
2501
2502 // Validity checks: dest.
2503 CheckSystemArrayCopyPosition(masm,
2504 dest_pos,
2505 dest,
2506 length,
2507 intrinsic_slow_path,
2508 temp1,
2509 optimizations.GetCountIsDestinationLength());
2510 {
2511 // We use a block to end the scratch scope before the write barrier, thus
2512 // freeing the temporary registers so they can be used in `MarkGCCard`.
2513 UseScratchRegisterScope temps(masm);
2514 Location temp3_loc; // Used only for Baker read barrier.
2515 Register temp3;
2516 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2517 temp3_loc = locations->GetTemp(2);
2518 temp3 = WRegisterFrom(temp3_loc);
2519 } else {
2520 temp3 = temps.AcquireW();
2521 }
2522
2523 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2524 // Check whether all elements of the source array are assignable to the component
2525 // type of the destination array. We do two checks: the classes are the same,
2526 // or the destination is Object[]. If none of these checks succeed, we go to the
2527 // slow path.
2528
2529 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2530 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2531 // /* HeapReference<Class> */ temp1 = src->klass_
2532 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2533 temp1_loc,
2534 src.W(),
2535 class_offset,
2536 temp3_loc,
2537 /* needs_null_check */ false,
2538 /* use_load_acquire */ false);
2539 // Bail out if the source is not a non primitive array.
2540 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2541 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2542 temp1_loc,
2543 temp1,
2544 component_offset,
2545 temp3_loc,
2546 /* needs_null_check */ false,
2547 /* use_load_acquire */ false);
2548 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
2549 // If heap poisoning is enabled, `temp1` has been unpoisoned
2550 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2551 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2552 __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
2553 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2554 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2555 }
2556
2557 // /* HeapReference<Class> */ temp1 = dest->klass_
2558 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2559 temp1_loc,
2560 dest.W(),
2561 class_offset,
2562 temp3_loc,
2563 /* needs_null_check */ false,
2564 /* use_load_acquire */ false);
2565
2566 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2567 // Bail out if the destination is not a non primitive array.
2568 //
2569 // Register `temp1` is not trashed by the read barrier emitted
2570 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2571 // method produces a call to a ReadBarrierMarkRegX entry point,
2572 // which saves all potentially live registers, including
2573 // temporaries such a `temp1`.
2574 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2575 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2576 temp2_loc,
2577 temp1,
2578 component_offset,
2579 temp3_loc,
2580 /* needs_null_check */ false,
2581 /* use_load_acquire */ false);
2582 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2583 // If heap poisoning is enabled, `temp2` has been unpoisoned
2584 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2585 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2586 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2587 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2588 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2589 }
2590
2591 // For the same reason given earlier, `temp1` is not trashed by the
2592 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2593 // /* HeapReference<Class> */ temp2 = src->klass_
2594 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2595 temp2_loc,
2596 src.W(),
2597 class_offset,
2598 temp3_loc,
2599 /* needs_null_check */ false,
2600 /* use_load_acquire */ false);
2601 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2602 __ Cmp(temp1, temp2);
2603
2604 if (optimizations.GetDestinationIsTypedObjectArray()) {
2605 vixl::aarch64::Label do_copy;
2606 __ B(&do_copy, eq);
2607 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2608 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2609 temp1_loc,
2610 temp1,
2611 component_offset,
2612 temp3_loc,
2613 /* needs_null_check */ false,
2614 /* use_load_acquire */ false);
2615 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2616 // We do not need to emit a read barrier for the following
2617 // heap reference load, as `temp1` is only used in a
2618 // comparison with null below, and this reference is not
2619 // kept afterwards.
2620 __ Ldr(temp1, HeapOperand(temp1, super_offset));
2621 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2622 __ Bind(&do_copy);
2623 } else {
2624 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2625 }
2626 } else {
2627 // Non read barrier code.
2628
2629 // /* HeapReference<Class> */ temp1 = dest->klass_
2630 __ Ldr(temp1, MemOperand(dest, class_offset));
2631 // /* HeapReference<Class> */ temp2 = src->klass_
2632 __ Ldr(temp2, MemOperand(src, class_offset));
2633 bool did_unpoison = false;
2634 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2635 !optimizations.GetSourceIsNonPrimitiveArray()) {
2636 // One or two of the references need to be unpoisoned. Unpoison them
2637 // both to make the identity check valid.
2638 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2639 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2640 did_unpoison = true;
2641 }
2642
2643 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2644 // Bail out if the destination is not a non primitive array.
2645 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2646 __ Ldr(temp3, HeapOperand(temp1, component_offset));
2647 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2648 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2649 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2650 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2651 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2652 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2653 }
2654
2655 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2656 // Bail out if the source is not a non primitive array.
2657 // /* HeapReference<Class> */ temp3 = temp2->component_type_
2658 __ Ldr(temp3, HeapOperand(temp2, component_offset));
2659 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2660 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2661 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2662 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2663 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2664 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2665 }
2666
2667 __ Cmp(temp1, temp2);
2668
2669 if (optimizations.GetDestinationIsTypedObjectArray()) {
2670 vixl::aarch64::Label do_copy;
2671 __ B(&do_copy, eq);
2672 if (!did_unpoison) {
2673 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2674 }
2675 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2676 __ Ldr(temp1, HeapOperand(temp1, component_offset));
2677 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2678 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2679 __ Ldr(temp1, HeapOperand(temp1, super_offset));
2680 // No need to unpoison the result, we're comparing against null.
2681 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2682 __ Bind(&do_copy);
2683 } else {
2684 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2685 }
2686 }
2687 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2688 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2689 // Bail out if the source is not a non primitive array.
2690 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2691 // /* HeapReference<Class> */ temp1 = src->klass_
2692 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2693 temp1_loc,
2694 src.W(),
2695 class_offset,
2696 temp3_loc,
2697 /* needs_null_check */ false,
2698 /* use_load_acquire */ false);
2699 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2700 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2701 temp2_loc,
2702 temp1,
2703 component_offset,
2704 temp3_loc,
2705 /* needs_null_check */ false,
2706 /* use_load_acquire */ false);
2707 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2708 // If heap poisoning is enabled, `temp2` has been unpoisoned
2709 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2710 } else {
2711 // /* HeapReference<Class> */ temp1 = src->klass_
2712 __ Ldr(temp1, HeapOperand(src.W(), class_offset));
2713 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2714 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2715 __ Ldr(temp2, HeapOperand(temp1, component_offset));
2716 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2717 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2718 }
2719 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2720 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2721 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2722 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2723 }
2724
2725 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
2726 // Null constant length: not need to emit the loop code at all.
2727 } else {
2728 Register src_curr_addr = temp1.X();
2729 Register dst_curr_addr = temp2.X();
2730 Register src_stop_addr = temp3.X();
2731 vixl::aarch64::Label done;
2732 const Primitive::Type type = Primitive::kPrimNot;
2733 const int32_t element_size = Primitive::ComponentSize(type);
2734
2735 if (length.IsRegister()) {
2736 // Don't enter the copy loop if the length is null.
2737 __ Cbz(WRegisterFrom(length), &done);
2738 }
2739
2740 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2741 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2742
2743 // SystemArrayCopy implementation for Baker read barriers (see
2744 // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2745 //
2746 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2747 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2748 // bool is_gray = (rb_state == ReadBarrier::GrayState());
2749 // if (is_gray) {
2750 // // Slow-path copy.
2751 // do {
2752 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2753 // } while (src_ptr != end_ptr)
2754 // } else {
2755 // // Fast-path copy.
2756 // do {
2757 // *dest_ptr++ = *src_ptr++;
2758 // } while (src_ptr != end_ptr)
2759 // }
2760
2761 // Make sure `tmp` is not IP0, as it is clobbered by
2762 // ReadBarrierMarkRegX entry points in
2763 // ReadBarrierSystemArrayCopySlowPathARM64.
2764 DCHECK(temps.IsAvailable(ip0));
2765 temps.Exclude(ip0);
2766 Register tmp = temps.AcquireW();
2767 DCHECK_NE(LocationFrom(tmp).reg(), IP0);
2768 // Put IP0 back in the pool so that VIXL has at least one
2769 // scratch register available to emit macro-instructions (note
2770 // that IP1 is already used for `tmp`). Indeed some
2771 // macro-instructions used in GenSystemArrayCopyAddresses
2772 // (invoked hereunder) may require a scratch register (for
2773 // instance to emit a load with a large constant offset).
2774 temps.Include(ip0);
2775
2776 // /* int32_t */ monitor = src->monitor_
2777 __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
2778 // /* LockWord */ lock_word = LockWord(monitor)
2779 static_assert(sizeof(LockWord) == sizeof(int32_t),
2780 "art::LockWord and int32_t have different sizes.");
2781
2782 // Introduce a dependency on the lock_word including rb_state,
2783 // to prevent load-load reordering, and without using
2784 // a memory barrier (which would be more expensive).
2785 // `src` is unchanged by this operation, but its value now depends
2786 // on `tmp`.
2787 __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
2788
2789 // Compute base source address, base destination address, and end
2790 // source address for System.arraycopy* intrinsics in `src_base`,
2791 // `dst_base` and `src_end` respectively.
2792 // Note that `src_curr_addr` is computed from from `src` (and
2793 // `src_pos`) here, and thus honors the artificial dependency
2794 // of `src` on `tmp`.
2795 GenSystemArrayCopyAddresses(masm,
2796 type,
2797 src,
2798 src_pos,
2799 dest,
2800 dest_pos,
2801 length,
2802 src_curr_addr,
2803 dst_curr_addr,
2804 src_stop_addr);
2805
2806 // Slow path used to copy array when `src` is gray.
2807 SlowPathCodeARM64* read_barrier_slow_path =
2808 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
2809 codegen_->AddSlowPath(read_barrier_slow_path);
2810
2811 // Given the numeric representation, it's enough to check the low bit of the rb_state.
2812 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2813 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2814 __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
2815
2816 // Fast-path copy.
2817 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2818 // poison/unpoison.
2819 vixl::aarch64::Label loop;
2820 __ Bind(&loop);
2821 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2822 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2823 __ Cmp(src_curr_addr, src_stop_addr);
2824 __ B(&loop, ne);
2825
2826 __ Bind(read_barrier_slow_path->GetExitLabel());
2827 } else {
2828 // Non read barrier code.
2829 // Compute base source address, base destination address, and end
2830 // source address for System.arraycopy* intrinsics in `src_base`,
2831 // `dst_base` and `src_end` respectively.
2832 GenSystemArrayCopyAddresses(masm,
2833 type,
2834 src,
2835 src_pos,
2836 dest,
2837 dest_pos,
2838 length,
2839 src_curr_addr,
2840 dst_curr_addr,
2841 src_stop_addr);
2842 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2843 // poison/unpoison.
2844 vixl::aarch64::Label loop;
2845 __ Bind(&loop);
2846 {
2847 Register tmp = temps.AcquireW();
2848 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2849 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2850 }
2851 __ Cmp(src_curr_addr, src_stop_addr);
2852 __ B(&loop, ne);
2853 }
2854 __ Bind(&done);
2855 }
2856 }
2857
2858 // We only need one card marking on the destination array.
2859 codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
2860
2861 __ Bind(intrinsic_slow_path->GetExitLabel());
2862 }
2863
GenIsInfinite(LocationSummary * locations,bool is64bit,MacroAssembler * masm)2864 static void GenIsInfinite(LocationSummary* locations,
2865 bool is64bit,
2866 MacroAssembler* masm) {
2867 Operand infinity;
2868 Register out;
2869
2870 if (is64bit) {
2871 infinity = kPositiveInfinityDouble;
2872 out = XRegisterFrom(locations->Out());
2873 } else {
2874 infinity = kPositiveInfinityFloat;
2875 out = WRegisterFrom(locations->Out());
2876 }
2877
2878 const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
2879
2880 MoveFPToInt(locations, is64bit, masm);
2881 __ Eor(out, out, infinity);
2882 // We don't care about the sign bit, so shift left.
2883 __ Cmp(zero, Operand(out, LSL, 1));
2884 __ Cset(out, eq);
2885 }
2886
VisitFloatIsInfinite(HInvoke * invoke)2887 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2888 CreateFPToIntLocations(arena_, invoke);
2889 }
2890
VisitFloatIsInfinite(HInvoke * invoke)2891 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2892 GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
2893 }
2894
VisitDoubleIsInfinite(HInvoke * invoke)2895 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2896 CreateFPToIntLocations(arena_, invoke);
2897 }
2898
VisitDoubleIsInfinite(HInvoke * invoke)2899 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2900 GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
2901 }
2902
VisitReferenceGetReferent(HInvoke * invoke)2903 void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
2904 if (kEmitCompilerReadBarrier) {
2905 // Do not intrinsify this call with the read barrier configuration.
2906 return;
2907 }
2908 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2909 LocationSummary::kCallOnSlowPath,
2910 kIntrinsified);
2911 locations->SetInAt(0, Location::RequiresRegister());
2912 locations->SetOut(Location::SameAsFirstInput());
2913 locations->AddTemp(Location::RequiresRegister());
2914 }
2915
VisitReferenceGetReferent(HInvoke * invoke)2916 void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
2917 DCHECK(!kEmitCompilerReadBarrier);
2918 MacroAssembler* masm = GetVIXLAssembler();
2919 LocationSummary* locations = invoke->GetLocations();
2920
2921 Register obj = InputRegisterAt(invoke, 0);
2922 Register out = OutputRegister(invoke);
2923
2924 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
2925 codegen_->AddSlowPath(slow_path);
2926
2927 // Load ArtMethod first.
2928 HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2929 DCHECK(invoke_direct != nullptr);
2930 Register temp0 = XRegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
2931 invoke_direct, locations->GetTemp(0)));
2932
2933 // Now get declaring class.
2934 __ Ldr(temp0.W(), MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
2935
2936 uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2937 uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2938 DCHECK_NE(slow_path_flag_offset, 0u);
2939 DCHECK_NE(disable_flag_offset, 0u);
2940 DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2941
2942 // Check static flags that prevent using intrinsic.
2943 if (slow_path_flag_offset == disable_flag_offset + 1) {
2944 // Load two adjacent flags in one 64-bit load.
2945 __ Ldr(temp0, MemOperand(temp0, disable_flag_offset));
2946 } else {
2947 UseScratchRegisterScope temps(masm);
2948 Register temp1 = temps.AcquireW();
2949 __ Ldr(temp1.W(), MemOperand(temp0, disable_flag_offset));
2950 __ Ldr(temp0.W(), MemOperand(temp0, slow_path_flag_offset));
2951 __ Orr(temp0, temp1, temp0);
2952 }
2953 __ Cbnz(temp0, slow_path->GetEntryLabel());
2954
2955 {
2956 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2957 vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2958 // Fast path.
2959 __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
2960 codegen_->MaybeRecordImplicitNullCheck(invoke);
2961 }
2962 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
2963 __ Bind(slow_path->GetExitLabel());
2964 }
2965
VisitIntegerValueOf(HInvoke * invoke)2966 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
2967 InvokeRuntimeCallingConvention calling_convention;
2968 IntrinsicVisitor::ComputeIntegerValueOfLocations(
2969 invoke,
2970 codegen_,
2971 calling_convention.GetReturnLocation(Primitive::kPrimNot),
2972 Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2973 }
2974
VisitIntegerValueOf(HInvoke * invoke)2975 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
2976 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
2977 LocationSummary* locations = invoke->GetLocations();
2978 MacroAssembler* masm = GetVIXLAssembler();
2979
2980 Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot);
2981 UseScratchRegisterScope temps(masm);
2982 Register temp = temps.AcquireW();
2983 InvokeRuntimeCallingConvention calling_convention;
2984 Register argument = calling_convention.GetRegisterAt(0);
2985 if (invoke->InputAt(0)->IsConstant()) {
2986 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2987 if (value >= info.low && value <= info.high) {
2988 // Just embed the j.l.Integer in the code.
2989 ScopedObjectAccess soa(Thread::Current());
2990 mirror::Object* boxed = info.cache->Get(value + (-info.low));
2991 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
2992 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
2993 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
2994 } else {
2995 // Allocate and initialize a new j.l.Integer.
2996 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2997 // JIT object table.
2998 uint32_t address =
2999 dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3000 __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
3001 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3002 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3003 __ Mov(temp.W(), value);
3004 __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
3005 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3006 // one.
3007 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3008 }
3009 } else {
3010 Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt);
3011 // Check bounds of our cache.
3012 __ Add(out.W(), in.W(), -info.low);
3013 __ Cmp(out.W(), info.high - info.low + 1);
3014 vixl::aarch64::Label allocate, done;
3015 __ B(&allocate, hs);
3016 // If the value is within the bounds, load the j.l.Integer directly from the array.
3017 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3018 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3019 __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
3020 MemOperand source = HeapOperand(
3021 temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot));
3022 codegen_->Load(Primitive::kPrimNot, out, source);
3023 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
3024 __ B(&done);
3025 __ Bind(&allocate);
3026 // Otherwise allocate and initialize a new j.l.Integer.
3027 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3028 __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
3029 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3030 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3031 __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
3032 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3033 // one.
3034 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3035 __ Bind(&done);
3036 }
3037 }
3038
3039 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
3040 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
3041 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
3042 UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit)
3043
3044 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
3045 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
3046 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
3047 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
3048 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
3049 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
3050 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
3051 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
3052
3053 // 1.8.
3054 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
3055 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
3056 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
3057 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
3058 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
3059
3060 UNREACHABLE_INTRINSICS(ARM64)
3061
3062 #undef __
3063
3064 } // namespace arm64
3065 } // namespace art
3066