1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm_vixl.h"
18
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "art_method.h"
21 #include "code_generator_arm_vixl.h"
22 #include "common_arm.h"
23 #include "heap_poisoning.h"
24 #include "lock_word.h"
25 #include "mirror/array-inl.h"
26 #include "mirror/object_array-inl.h"
27 #include "mirror/reference.h"
28 #include "mirror/string-inl.h"
29 #include "scoped_thread_state_change-inl.h"
30 #include "thread-current-inl.h"
31
32 #include "aarch32/constants-aarch32.h"
33
34 namespace art {
35 namespace arm {
36
37 #define __ assembler->GetVIXLAssembler()->
38
39 using helpers::DRegisterFrom;
40 using helpers::HighRegisterFrom;
41 using helpers::InputDRegisterAt;
42 using helpers::InputRegisterAt;
43 using helpers::InputSRegisterAt;
44 using helpers::Int32ConstantFrom;
45 using helpers::LocationFrom;
46 using helpers::LowRegisterFrom;
47 using helpers::LowSRegisterFrom;
48 using helpers::HighSRegisterFrom;
49 using helpers::OutputDRegister;
50 using helpers::OutputRegister;
51 using helpers::RegisterFrom;
52 using helpers::SRegisterFrom;
53
54 using namespace vixl::aarch32; // NOLINT(build/namespaces)
55
56 using vixl::ExactAssemblyScope;
57 using vixl::CodeBufferCheckScope;
58
GetAssembler()59 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
60 return codegen_->GetAssembler();
61 }
62
GetAllocator()63 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
64 return codegen_->GetGraph()->GetAllocator();
65 }
66
67 // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
68 // intrinsified call. This will copy the arguments into the positions for a regular call.
69 //
70 // Note: The actual parameters are required to be in the locations given by the invoke's location
71 // summary. If an intrinsic modifies those locations before a slowpath call, they must be
72 // restored!
73 //
74 // Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
75 // sub-optimal (compared to a direct pointer call), but this is a slow-path.
76
77 class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
78 public:
IntrinsicSlowPathARMVIXL(HInvoke * invoke)79 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
80 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
81
MoveArguments(CodeGenerator * codegen)82 Location MoveArguments(CodeGenerator* codegen) {
83 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
84 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
85 return calling_convention_visitor.GetMethodLocation();
86 }
87
EmitNativeCode(CodeGenerator * codegen)88 void EmitNativeCode(CodeGenerator* codegen) override {
89 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
90 __ Bind(GetEntryLabel());
91
92 SaveLiveRegisters(codegen, invoke_->GetLocations());
93
94 Location method_loc = MoveArguments(codegen);
95
96 if (invoke_->IsInvokeStaticOrDirect()) {
97 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
98 } else {
99 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
100 }
101
102 // Copy the result back to the expected output.
103 Location out = invoke_->GetLocations()->Out();
104 if (out.IsValid()) {
105 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
106 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
107 codegen->MoveFromReturnRegister(out, invoke_->GetType());
108 }
109
110 RestoreLiveRegisters(codegen, invoke_->GetLocations());
111 __ B(GetExitLabel());
112 }
113
GetDescription() const114 const char* GetDescription() const override { return "IntrinsicSlowPath"; }
115
116 private:
117 // The instruction where this slow path is happening.
118 HInvoke* const invoke_;
119
120 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
121 };
122
123 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmVIXLAssembler * assembler,DataType::Type type,const vixl32::Register & array,const Location & pos,const vixl32::Register & base)124 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
125 DataType::Type type,
126 const vixl32::Register& array,
127 const Location& pos,
128 const vixl32::Register& base) {
129 // This routine is only used by the SystemArrayCopy intrinsic at the
130 // moment. We can allow DataType::Type::kReference as `type` to implement
131 // the SystemArrayCopyChar intrinsic.
132 DCHECK_EQ(type, DataType::Type::kReference);
133 const int32_t element_size = DataType::Size(type);
134 const uint32_t element_size_shift = DataType::SizeShift(type);
135 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
136
137 if (pos.IsConstant()) {
138 int32_t constant = Int32ConstantFrom(pos);
139 __ Add(base, array, element_size * constant + data_offset);
140 } else {
141 __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
142 __ Add(base, base, data_offset);
143 }
144 }
145
146 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmVIXLAssembler * assembler,DataType::Type type,const Location & copy_length,const vixl32::Register & base,const vixl32::Register & end)147 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
148 DataType::Type type,
149 const Location& copy_length,
150 const vixl32::Register& base,
151 const vixl32::Register& end) {
152 // This routine is only used by the SystemArrayCopy intrinsic at the
153 // moment. We can allow DataType::Type::kReference as `type` to implement
154 // the SystemArrayCopyChar intrinsic.
155 DCHECK_EQ(type, DataType::Type::kReference);
156 const int32_t element_size = DataType::Size(type);
157 const uint32_t element_size_shift = DataType::SizeShift(type);
158
159 if (copy_length.IsConstant()) {
160 int32_t constant = Int32ConstantFrom(copy_length);
161 __ Add(end, base, element_size * constant);
162 } else {
163 __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
164 }
165 }
166
167 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
168 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
169 public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)170 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
171 : SlowPathCodeARMVIXL(instruction) {
172 DCHECK(kEmitCompilerReadBarrier);
173 DCHECK(kUseBakerReadBarrier);
174 }
175
EmitNativeCode(CodeGenerator * codegen)176 void EmitNativeCode(CodeGenerator* codegen) override {
177 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
178 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
179 LocationSummary* locations = instruction_->GetLocations();
180 DCHECK(locations->CanCall());
181 DCHECK(instruction_->IsInvokeStaticOrDirect())
182 << "Unexpected instruction in read barrier arraycopy slow path: "
183 << instruction_->DebugName();
184 DCHECK(instruction_->GetLocations()->Intrinsified());
185 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
186
187 DataType::Type type = DataType::Type::kReference;
188 const int32_t element_size = DataType::Size(type);
189
190 vixl32::Register dest = InputRegisterAt(instruction_, 2);
191 Location dest_pos = locations->InAt(3);
192 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
193 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
194 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
195 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
196
197 __ Bind(GetEntryLabel());
198 // Compute the base destination address in `dst_curr_addr`.
199 GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
200
201 vixl32::Label loop;
202 __ Bind(&loop);
203 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
204 assembler->MaybeUnpoisonHeapReference(tmp);
205 // TODO: Inline the mark bit check before calling the runtime?
206 // tmp = ReadBarrier::Mark(tmp);
207 // No need to save live registers; it's taken care of by the
208 // entrypoint. Also, there is no need to update the stack mask,
209 // as this runtime call will not trigger a garbage collection.
210 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
211 // explanations.)
212 DCHECK(!tmp.IsSP());
213 DCHECK(!tmp.IsLR());
214 DCHECK(!tmp.IsPC());
215 // IP is used internally by the ReadBarrierMarkRegX entry point
216 // as a temporary (and not preserved). It thus cannot be used by
217 // any live register in this slow path.
218 DCHECK(!src_curr_addr.Is(ip));
219 DCHECK(!dst_curr_addr.Is(ip));
220 DCHECK(!src_stop_addr.Is(ip));
221 DCHECK(!tmp.Is(ip));
222 DCHECK(tmp.IsRegister()) << tmp;
223 // TODO: Load the entrypoint once before the loop, instead of
224 // loading it at every iteration.
225 int32_t entry_point_offset =
226 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
227 // This runtime call does not require a stack map.
228 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
229 assembler->MaybePoisonHeapReference(tmp);
230 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
231 __ Cmp(src_curr_addr, src_stop_addr);
232 __ B(ne, &loop, /* is_far_target= */ false);
233 __ B(GetExitLabel());
234 }
235
GetDescription() const236 const char* GetDescription() const override {
237 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
238 }
239
240 private:
241 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
242 };
243
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)244 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
245 : allocator_(codegen->GetGraph()->GetAllocator()),
246 codegen_(codegen),
247 assembler_(codegen->GetAssembler()),
248 features_(codegen->GetInstructionSetFeatures()) {}
249
TryDispatch(HInvoke * invoke)250 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
251 Dispatch(invoke);
252 LocationSummary* res = invoke->GetLocations();
253 if (res == nullptr) {
254 return false;
255 }
256 return res->Intrinsified();
257 }
258
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)259 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
260 LocationSummary* locations =
261 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
262 locations->SetInAt(0, Location::RequiresFpuRegister());
263 locations->SetOut(Location::RequiresRegister());
264 }
265
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)266 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
267 LocationSummary* locations =
268 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
269 locations->SetInAt(0, Location::RequiresRegister());
270 locations->SetOut(Location::RequiresFpuRegister());
271 }
272
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)273 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
274 Location input = locations->InAt(0);
275 Location output = locations->Out();
276 if (is64bit) {
277 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
278 } else {
279 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
280 }
281 }
282
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)283 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
284 Location input = locations->InAt(0);
285 Location output = locations->Out();
286 if (is64bit) {
287 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
288 } else {
289 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
290 }
291 }
292
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)293 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
294 CreateFPToIntLocations(allocator_, invoke);
295 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)296 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
297 CreateIntToFPLocations(allocator_, invoke);
298 }
299
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)300 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
301 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
302 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)303 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
304 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
305 }
306
VisitFloatFloatToRawIntBits(HInvoke * invoke)307 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
308 CreateFPToIntLocations(allocator_, invoke);
309 }
VisitFloatIntBitsToFloat(HInvoke * invoke)310 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
311 CreateIntToFPLocations(allocator_, invoke);
312 }
313
VisitFloatFloatToRawIntBits(HInvoke * invoke)314 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
315 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
316 }
VisitFloatIntBitsToFloat(HInvoke * invoke)317 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
318 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
319 }
320
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)321 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
322 LocationSummary* locations =
323 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
324 locations->SetInAt(0, Location::RequiresRegister());
325 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
326 }
327
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)328 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
329 LocationSummary* locations =
330 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
331 locations->SetInAt(0, Location::RequiresRegister());
332 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
333 }
334
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)335 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
336 LocationSummary* locations =
337 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
338 locations->SetInAt(0, Location::RequiresFpuRegister());
339 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
340 }
341
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)342 static void GenNumberOfLeadingZeros(HInvoke* invoke,
343 DataType::Type type,
344 CodeGeneratorARMVIXL* codegen) {
345 ArmVIXLAssembler* assembler = codegen->GetAssembler();
346 LocationSummary* locations = invoke->GetLocations();
347 Location in = locations->InAt(0);
348 vixl32::Register out = RegisterFrom(locations->Out());
349
350 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
351
352 if (type == DataType::Type::kInt64) {
353 vixl32::Register in_reg_lo = LowRegisterFrom(in);
354 vixl32::Register in_reg_hi = HighRegisterFrom(in);
355 vixl32::Label end;
356 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
357 __ Clz(out, in_reg_hi);
358 __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
359 __ Clz(out, in_reg_lo);
360 __ Add(out, out, 32);
361 if (end.IsReferenced()) {
362 __ Bind(&end);
363 }
364 } else {
365 __ Clz(out, RegisterFrom(in));
366 }
367 }
368
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)369 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
370 CreateIntToIntLocations(allocator_, invoke);
371 }
372
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)373 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
374 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
375 }
376
VisitLongNumberOfLeadingZeros(HInvoke * invoke)377 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
378 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
379 }
380
VisitLongNumberOfLeadingZeros(HInvoke * invoke)381 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
382 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
383 }
384
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)385 static void GenNumberOfTrailingZeros(HInvoke* invoke,
386 DataType::Type type,
387 CodeGeneratorARMVIXL* codegen) {
388 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
389
390 ArmVIXLAssembler* assembler = codegen->GetAssembler();
391 LocationSummary* locations = invoke->GetLocations();
392 vixl32::Register out = RegisterFrom(locations->Out());
393
394 if (type == DataType::Type::kInt64) {
395 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
396 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
397 vixl32::Label end;
398 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
399 __ Rbit(out, in_reg_lo);
400 __ Clz(out, out);
401 __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
402 __ Rbit(out, in_reg_hi);
403 __ Clz(out, out);
404 __ Add(out, out, 32);
405 if (end.IsReferenced()) {
406 __ Bind(&end);
407 }
408 } else {
409 vixl32::Register in = RegisterFrom(locations->InAt(0));
410 __ Rbit(out, in);
411 __ Clz(out, out);
412 }
413 }
414
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)415 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
416 CreateIntToIntLocations(allocator_, invoke);
417 }
418
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)419 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
420 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
421 }
422
VisitLongNumberOfTrailingZeros(HInvoke * invoke)423 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
424 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
425 }
426
VisitLongNumberOfTrailingZeros(HInvoke * invoke)427 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
428 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
429 }
430
VisitMathSqrt(HInvoke * invoke)431 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
432 CreateFPToFPLocations(allocator_, invoke);
433 }
434
VisitMathSqrt(HInvoke * invoke)435 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
436 ArmVIXLAssembler* assembler = GetAssembler();
437 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
438 }
439
VisitMathRint(HInvoke * invoke)440 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
441 if (features_.HasARMv8AInstructions()) {
442 CreateFPToFPLocations(allocator_, invoke);
443 }
444 }
445
VisitMathRint(HInvoke * invoke)446 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
447 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
448 ArmVIXLAssembler* assembler = GetAssembler();
449 __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
450 }
451
VisitMathRoundFloat(HInvoke * invoke)452 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
453 if (features_.HasARMv8AInstructions()) {
454 LocationSummary* locations =
455 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
456 locations->SetInAt(0, Location::RequiresFpuRegister());
457 locations->SetOut(Location::RequiresRegister());
458 locations->AddTemp(Location::RequiresFpuRegister());
459 }
460 }
461
VisitMathRoundFloat(HInvoke * invoke)462 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
463 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
464
465 ArmVIXLAssembler* assembler = GetAssembler();
466 vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
467 vixl32::Register out_reg = OutputRegister(invoke);
468 vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
469 vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
470 vixl32::Label done;
471 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
472
473 // Round to nearest integer, ties away from zero.
474 __ Vcvta(S32, F32, temp1, in_reg);
475 __ Vmov(out_reg, temp1);
476
477 // For positive, zero or NaN inputs, rounding is done.
478 __ Cmp(out_reg, 0);
479 __ B(ge, final_label, /* is_far_target= */ false);
480
481 // Handle input < 0 cases.
482 // If input is negative but not a tie, previous result (round to nearest) is valid.
483 // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
484 __ Vrinta(F32, temp1, in_reg);
485 __ Vmov(temp2, 0.5);
486 __ Vsub(F32, temp1, in_reg, temp1);
487 __ Vcmp(F32, temp1, temp2);
488 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
489 {
490 // Use ExactAsemblyScope here because we are using IT.
491 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
492 2 * kMaxInstructionSizeInBytes,
493 CodeBufferCheckScope::kMaximumSize);
494 __ it(eq);
495 __ add(eq, out_reg, out_reg, 1);
496 }
497
498 if (done.IsReferenced()) {
499 __ Bind(&done);
500 }
501 }
502
VisitMemoryPeekByte(HInvoke * invoke)503 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
504 CreateIntToIntLocations(allocator_, invoke);
505 }
506
VisitMemoryPeekByte(HInvoke * invoke)507 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
508 ArmVIXLAssembler* assembler = GetAssembler();
509 // Ignore upper 4B of long address.
510 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
511 }
512
VisitMemoryPeekIntNative(HInvoke * invoke)513 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
514 CreateIntToIntLocations(allocator_, invoke);
515 }
516
VisitMemoryPeekIntNative(HInvoke * invoke)517 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
518 ArmVIXLAssembler* assembler = GetAssembler();
519 // Ignore upper 4B of long address.
520 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
521 }
522
VisitMemoryPeekLongNative(HInvoke * invoke)523 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
524 CreateIntToIntLocations(allocator_, invoke);
525 }
526
VisitMemoryPeekLongNative(HInvoke * invoke)527 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
528 ArmVIXLAssembler* assembler = GetAssembler();
529 // Ignore upper 4B of long address.
530 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
531 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
532 // exception. So we can't use ldrd as addr may be unaligned.
533 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
534 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
535 if (addr.Is(lo)) {
536 __ Ldr(hi, MemOperand(addr, 4));
537 __ Ldr(lo, MemOperand(addr));
538 } else {
539 __ Ldr(lo, MemOperand(addr));
540 __ Ldr(hi, MemOperand(addr, 4));
541 }
542 }
543
VisitMemoryPeekShortNative(HInvoke * invoke)544 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
545 CreateIntToIntLocations(allocator_, invoke);
546 }
547
VisitMemoryPeekShortNative(HInvoke * invoke)548 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
549 ArmVIXLAssembler* assembler = GetAssembler();
550 // Ignore upper 4B of long address.
551 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
552 }
553
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)554 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
555 LocationSummary* locations =
556 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
557 locations->SetInAt(0, Location::RequiresRegister());
558 locations->SetInAt(1, Location::RequiresRegister());
559 }
560
VisitMemoryPokeByte(HInvoke * invoke)561 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
562 CreateIntIntToVoidLocations(allocator_, invoke);
563 }
564
VisitMemoryPokeByte(HInvoke * invoke)565 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
566 ArmVIXLAssembler* assembler = GetAssembler();
567 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
568 }
569
VisitMemoryPokeIntNative(HInvoke * invoke)570 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
571 CreateIntIntToVoidLocations(allocator_, invoke);
572 }
573
VisitMemoryPokeIntNative(HInvoke * invoke)574 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
575 ArmVIXLAssembler* assembler = GetAssembler();
576 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
577 }
578
VisitMemoryPokeLongNative(HInvoke * invoke)579 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
580 CreateIntIntToVoidLocations(allocator_, invoke);
581 }
582
VisitMemoryPokeLongNative(HInvoke * invoke)583 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
584 ArmVIXLAssembler* assembler = GetAssembler();
585 // Ignore upper 4B of long address.
586 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
587 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
588 // exception. So we can't use ldrd as addr may be unaligned.
589 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
590 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
591 }
592
VisitMemoryPokeShortNative(HInvoke * invoke)593 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
594 CreateIntIntToVoidLocations(allocator_, invoke);
595 }
596
VisitMemoryPokeShortNative(HInvoke * invoke)597 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
598 ArmVIXLAssembler* assembler = GetAssembler();
599 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
600 }
601
VisitThreadCurrentThread(HInvoke * invoke)602 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
603 LocationSummary* locations =
604 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
605 locations->SetOut(Location::RequiresRegister());
606 }
607
VisitThreadCurrentThread(HInvoke * invoke)608 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
609 ArmVIXLAssembler* assembler = GetAssembler();
610 __ Ldr(OutputRegister(invoke),
611 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
612 }
613
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARMVIXL * codegen)614 static void GenUnsafeGet(HInvoke* invoke,
615 DataType::Type type,
616 bool is_volatile,
617 CodeGeneratorARMVIXL* codegen) {
618 LocationSummary* locations = invoke->GetLocations();
619 ArmVIXLAssembler* assembler = codegen->GetAssembler();
620 Location base_loc = locations->InAt(1);
621 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
622 Location offset_loc = locations->InAt(2);
623 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only.
624 Location trg_loc = locations->Out();
625
626 switch (type) {
627 case DataType::Type::kInt32: {
628 vixl32::Register trg = RegisterFrom(trg_loc);
629 __ Ldr(trg, MemOperand(base, offset));
630 if (is_volatile) {
631 __ Dmb(vixl32::ISH);
632 }
633 break;
634 }
635
636 case DataType::Type::kReference: {
637 vixl32::Register trg = RegisterFrom(trg_loc);
638 if (kEmitCompilerReadBarrier) {
639 if (kUseBakerReadBarrier) {
640 Location temp = locations->GetTemp(0);
641 // Piggy-back on the field load path using introspection for the Baker read barrier.
642 __ Add(RegisterFrom(temp), base, Operand(offset));
643 MemOperand src(RegisterFrom(temp), 0);
644 codegen->GenerateFieldLoadWithBakerReadBarrier(
645 invoke, trg_loc, base, src, /* needs_null_check= */ false);
646 if (is_volatile) {
647 __ Dmb(vixl32::ISH);
648 }
649 } else {
650 __ Ldr(trg, MemOperand(base, offset));
651 if (is_volatile) {
652 __ Dmb(vixl32::ISH);
653 }
654 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
655 }
656 } else {
657 __ Ldr(trg, MemOperand(base, offset));
658 if (is_volatile) {
659 __ Dmb(vixl32::ISH);
660 }
661 assembler->MaybeUnpoisonHeapReference(trg);
662 }
663 break;
664 }
665
666 case DataType::Type::kInt64: {
667 vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
668 vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
669 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
670 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
671 const vixl32::Register temp_reg = temps.Acquire();
672 __ Add(temp_reg, base, offset);
673 __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
674 } else {
675 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
676 }
677 if (is_volatile) {
678 __ Dmb(vixl32::ISH);
679 }
680 break;
681 }
682
683 default:
684 LOG(FATAL) << "Unexpected type " << type;
685 UNREACHABLE();
686 }
687 }
688
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)689 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
690 HInvoke* invoke,
691 DataType::Type type) {
692 bool can_call = kEmitCompilerReadBarrier &&
693 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
694 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
695 LocationSummary* locations =
696 new (allocator) LocationSummary(invoke,
697 can_call
698 ? LocationSummary::kCallOnSlowPath
699 : LocationSummary::kNoCall,
700 kIntrinsified);
701 if (can_call && kUseBakerReadBarrier) {
702 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
703 }
704 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
705 locations->SetInAt(1, Location::RequiresRegister());
706 locations->SetInAt(2, Location::RequiresRegister());
707 locations->SetOut(Location::RequiresRegister(),
708 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
709 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
710 // We need a temporary register for the read barrier marking slow
711 // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier.
712 locations->AddTemp(Location::RequiresRegister());
713 }
714 }
715
VisitUnsafeGet(HInvoke * invoke)716 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
717 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
718 }
VisitUnsafeGetVolatile(HInvoke * invoke)719 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
720 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
721 }
VisitUnsafeGetLong(HInvoke * invoke)722 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
723 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
724 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)725 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
726 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
727 }
VisitUnsafeGetObject(HInvoke * invoke)728 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
729 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
730 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)731 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
732 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
733 }
734
VisitUnsafeGet(HInvoke * invoke)735 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
736 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
737 }
VisitUnsafeGetVolatile(HInvoke * invoke)738 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
739 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
740 }
VisitUnsafeGetLong(HInvoke * invoke)741 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
742 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
743 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)744 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
745 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
746 }
VisitUnsafeGetObject(HInvoke * invoke)747 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
748 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
749 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)750 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
751 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
752 }
753
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,const ArmInstructionSetFeatures & features,DataType::Type type,bool is_volatile,HInvoke * invoke)754 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator,
755 const ArmInstructionSetFeatures& features,
756 DataType::Type type,
757 bool is_volatile,
758 HInvoke* invoke) {
759 LocationSummary* locations =
760 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
761 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
762 locations->SetInAt(1, Location::RequiresRegister());
763 locations->SetInAt(2, Location::RequiresRegister());
764 locations->SetInAt(3, Location::RequiresRegister());
765
766 if (type == DataType::Type::kInt64) {
767 // Potentially need temps for ldrexd-strexd loop.
768 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
769 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
770 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
771 }
772 } else if (type == DataType::Type::kReference) {
773 // Temps for card-marking.
774 locations->AddTemp(Location::RequiresRegister()); // Temp.
775 locations->AddTemp(Location::RequiresRegister()); // Card.
776 }
777 }
778
VisitUnsafePut(HInvoke * invoke)779 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
780 CreateIntIntIntIntToVoid(
781 allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke);
782 }
VisitUnsafePutOrdered(HInvoke * invoke)783 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
784 CreateIntIntIntIntToVoid(
785 allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke);
786 }
VisitUnsafePutVolatile(HInvoke * invoke)787 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
788 CreateIntIntIntIntToVoid(
789 allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ true, invoke);
790 }
VisitUnsafePutObject(HInvoke * invoke)791 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
792 CreateIntIntIntIntToVoid(
793 allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke);
794 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)795 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
796 CreateIntIntIntIntToVoid(
797 allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke);
798 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)799 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
800 CreateIntIntIntIntToVoid(
801 allocator_, features_, DataType::Type::kReference, /* is_volatile= */ true, invoke);
802 }
VisitUnsafePutLong(HInvoke * invoke)803 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
804 CreateIntIntIntIntToVoid(
805 allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke);
806 }
VisitUnsafePutLongOrdered(HInvoke * invoke)807 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
808 CreateIntIntIntIntToVoid(
809 allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke);
810 }
VisitUnsafePutLongVolatile(HInvoke * invoke)811 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
812 CreateIntIntIntIntToVoid(
813 allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ true, invoke);
814 }
815
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARMVIXL * codegen)816 static void GenUnsafePut(LocationSummary* locations,
817 DataType::Type type,
818 bool is_volatile,
819 bool is_ordered,
820 CodeGeneratorARMVIXL* codegen) {
821 ArmVIXLAssembler* assembler = codegen->GetAssembler();
822
823 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
824 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
825 vixl32::Register value;
826
827 if (is_volatile || is_ordered) {
828 __ Dmb(vixl32::ISH);
829 }
830
831 if (type == DataType::Type::kInt64) {
832 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
833 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
834 value = value_lo;
835 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
836 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
837 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
838 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
839 const vixl32::Register temp_reg = temps.Acquire();
840
841 __ Add(temp_reg, base, offset);
842 vixl32::Label loop_head;
843 __ Bind(&loop_head);
844 __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
845 __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
846 __ Cmp(temp_lo, 0);
847 __ B(ne, &loop_head, /* is_far_target= */ false);
848 } else {
849 __ Strd(value_lo, value_hi, MemOperand(base, offset));
850 }
851 } else {
852 value = RegisterFrom(locations->InAt(3));
853 vixl32::Register source = value;
854 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
855 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
856 __ Mov(temp, value);
857 assembler->PoisonHeapReference(temp);
858 source = temp;
859 }
860 __ Str(source, MemOperand(base, offset));
861 }
862
863 if (is_volatile) {
864 __ Dmb(vixl32::ISH);
865 }
866
867 if (type == DataType::Type::kReference) {
868 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
869 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
870 bool value_can_be_null = true; // TODO: Worth finding out this information?
871 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
872 }
873 }
874
VisitUnsafePut(HInvoke * invoke)875 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
876 GenUnsafePut(invoke->GetLocations(),
877 DataType::Type::kInt32,
878 /* is_volatile= */ false,
879 /* is_ordered= */ false,
880 codegen_);
881 }
VisitUnsafePutOrdered(HInvoke * invoke)882 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
883 GenUnsafePut(invoke->GetLocations(),
884 DataType::Type::kInt32,
885 /* is_volatile= */ false,
886 /* is_ordered= */ true,
887 codegen_);
888 }
VisitUnsafePutVolatile(HInvoke * invoke)889 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
890 GenUnsafePut(invoke->GetLocations(),
891 DataType::Type::kInt32,
892 /* is_volatile= */ true,
893 /* is_ordered= */ false,
894 codegen_);
895 }
VisitUnsafePutObject(HInvoke * invoke)896 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
897 GenUnsafePut(invoke->GetLocations(),
898 DataType::Type::kReference,
899 /* is_volatile= */ false,
900 /* is_ordered= */ false,
901 codegen_);
902 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)903 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
904 GenUnsafePut(invoke->GetLocations(),
905 DataType::Type::kReference,
906 /* is_volatile= */ false,
907 /* is_ordered= */ true,
908 codegen_);
909 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)910 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
911 GenUnsafePut(invoke->GetLocations(),
912 DataType::Type::kReference,
913 /* is_volatile= */ true,
914 /* is_ordered= */ false,
915 codegen_);
916 }
VisitUnsafePutLong(HInvoke * invoke)917 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
918 GenUnsafePut(invoke->GetLocations(),
919 DataType::Type::kInt64,
920 /* is_volatile= */ false,
921 /* is_ordered= */ false,
922 codegen_);
923 }
VisitUnsafePutLongOrdered(HInvoke * invoke)924 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
925 GenUnsafePut(invoke->GetLocations(),
926 DataType::Type::kInt64,
927 /* is_volatile= */ false,
928 /* is_ordered= */ true,
929 codegen_);
930 }
VisitUnsafePutLongVolatile(HInvoke * invoke)931 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
932 GenUnsafePut(invoke->GetLocations(),
933 DataType::Type::kInt64,
934 /* is_volatile= */ true,
935 /* is_ordered= */ false,
936 codegen_);
937 }
938
CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator * allocator,HInvoke * invoke)939 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
940 bool can_call = kEmitCompilerReadBarrier &&
941 kUseBakerReadBarrier &&
942 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
943 LocationSummary* locations =
944 new (allocator) LocationSummary(invoke,
945 can_call
946 ? LocationSummary::kCallOnSlowPath
947 : LocationSummary::kNoCall,
948 kIntrinsified);
949 if (can_call) {
950 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
951 }
952 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
953 locations->SetInAt(1, Location::RequiresRegister());
954 locations->SetInAt(2, Location::RequiresRegister());
955 locations->SetInAt(3, Location::RequiresRegister());
956 locations->SetInAt(4, Location::RequiresRegister());
957
958 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
959
960 // Temporary registers used in CAS. In the object case
961 // (UnsafeCASObject intrinsic), these are also used for
962 // card-marking, and possibly for (Baker) read barrier.
963 locations->AddTemp(Location::RequiresRegister()); // Pointer.
964 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
965 }
966
967 class BakerReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
968 public:
BakerReadBarrierCasSlowPathARMVIXL(HInvoke * invoke)969 explicit BakerReadBarrierCasSlowPathARMVIXL(HInvoke* invoke)
970 : SlowPathCodeARMVIXL(invoke) {}
971
GetDescription() const972 const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARMVIXL"; }
973
EmitNativeCode(CodeGenerator * codegen)974 void EmitNativeCode(CodeGenerator* codegen) override {
975 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
976 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
977 __ Bind(GetEntryLabel());
978
979 LocationSummary* locations = instruction_->GetLocations();
980 vixl32::Register base = InputRegisterAt(instruction_, 1); // Object pointer.
981 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
982 vixl32::Register expected = InputRegisterAt(instruction_, 3); // Expected.
983 vixl32::Register value = InputRegisterAt(instruction_, 4); // Value.
984
985 vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
986 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary.
987
988 // The `tmp` is initialized to `[tmp_ptr] - expected` in the main path. Reconstruct
989 // and mark the old value and compare with `expected`. We clobber `tmp_ptr` in the
990 // process due to lack of other temps suitable for the read barrier.
991 arm_codegen->GenerateUnsafeCasOldValueAddWithBakerReadBarrier(tmp_ptr, tmp, expected);
992 __ Cmp(tmp_ptr, expected);
993 __ B(ne, GetExitLabel());
994
995 // The old value we have read did not match `expected` (which is always a to-space reference)
996 // but after the read barrier in GenerateUnsafeCasOldValueAddWithBakerReadBarrier() the marked
997 // to-space value matched, so the old value must be a from-space reference to the same object.
998 // Do the same CAS loop as the main path but check for both `expected` and the unmarked
999 // old value representing the to-space and from-space references for the same object.
1000
1001 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1002 vixl32::Register adjusted_old_value = temps.Acquire(); // For saved `tmp` from main path.
1003
1004 // Recalculate the `tmp_ptr` clobbered above and store the `adjusted_old_value`, i.e. IP.
1005 __ Add(tmp_ptr, base, offset);
1006 __ Mov(adjusted_old_value, tmp);
1007
1008 // do {
1009 // tmp = [r_ptr] - expected;
1010 // } while ((tmp == 0 || tmp == adjusted_old_value) && failure([r_ptr] <- r_new_value));
1011 // result = (tmp == 0 || tmp == adjusted_old_value);
1012
1013 vixl32::Label loop_head;
1014 __ Bind(&loop_head);
1015 __ Ldrex(tmp, MemOperand(tmp_ptr)); // This can now load null stored by another thread.
1016 assembler->MaybeUnpoisonHeapReference(tmp);
1017 __ Subs(tmp, tmp, expected); // Use SUBS to get non-zero value if both compares fail.
1018 {
1019 // If the newly loaded value did not match `expected`, compare with `adjusted_old_value`.
1020 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
1021 __ it(ne);
1022 __ cmp(ne, tmp, adjusted_old_value);
1023 }
1024 __ B(ne, GetExitLabel());
1025 assembler->MaybePoisonHeapReference(value);
1026 __ Strex(tmp, value, MemOperand(tmp_ptr));
1027 assembler->MaybeUnpoisonHeapReference(value);
1028 __ Cmp(tmp, 0);
1029 __ B(ne, &loop_head, /* is_far_target= */ false);
1030 __ B(GetExitLabel());
1031 }
1032 };
1033
GenCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)1034 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
1035 DCHECK_NE(type, DataType::Type::kInt64);
1036
1037 ArmVIXLAssembler* assembler = codegen->GetAssembler();
1038 LocationSummary* locations = invoke->GetLocations();
1039
1040 vixl32::Register out = OutputRegister(invoke); // Boolean result.
1041
1042 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
1043 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
1044 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
1045 vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
1046
1047 vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
1048 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary.
1049
1050 vixl32::Label loop_exit_label;
1051 vixl32::Label* loop_exit = &loop_exit_label;
1052 vixl32::Label* failure = &loop_exit_label;
1053
1054 if (type == DataType::Type::kReference) {
1055 // The only read barrier implementation supporting the
1056 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1057 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1058
1059 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1060 // object and scan the receiver at the next GC for nothing.
1061 bool value_can_be_null = true; // TODO: Worth finding out this information?
1062 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1063
1064 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1065 // If marking, check if the stored reference is a from-space reference to the same
1066 // object as the to-space reference `expected`. If so, perform a custom CAS loop.
1067 BakerReadBarrierCasSlowPathARMVIXL* slow_path =
1068 new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARMVIXL(invoke);
1069 codegen->AddSlowPath(slow_path);
1070 failure = slow_path->GetEntryLabel();
1071 loop_exit = slow_path->GetExitLabel();
1072 }
1073 }
1074
1075 // Prevent reordering with prior memory operations.
1076 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1077 // latter allows a preceding load to be delayed past the STREX
1078 // instruction below.
1079 __ Dmb(vixl32::ISH);
1080
1081 __ Add(tmp_ptr, base, offset);
1082
1083 // do {
1084 // tmp = [r_ptr] - expected;
1085 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1086 // result = tmp == 0;
1087
1088 vixl32::Label loop_head;
1089 __ Bind(&loop_head);
1090 __ Ldrex(tmp, MemOperand(tmp_ptr));
1091 if (type == DataType::Type::kReference) {
1092 assembler->MaybeUnpoisonHeapReference(tmp);
1093 }
1094 __ Subs(tmp, tmp, expected);
1095 static_cast<vixl32::MacroAssembler*>(assembler->GetVIXLAssembler())->
1096 B(ne, failure, /* hint= */ (failure == loop_exit) ? kNear : kBranchWithoutHint);
1097 if (type == DataType::Type::kReference) {
1098 assembler->MaybePoisonHeapReference(value);
1099 }
1100 __ Strex(tmp, value, MemOperand(tmp_ptr));
1101 if (type == DataType::Type::kReference) {
1102 assembler->MaybeUnpoisonHeapReference(value);
1103 }
1104 __ Cmp(tmp, 0);
1105 __ B(ne, &loop_head, /* is_far_target= */ false);
1106
1107 __ Bind(loop_exit);
1108
1109 __ Dmb(vixl32::ISH);
1110
1111 // out = tmp == 0.
1112 __ Clz(out, tmp);
1113 __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
1114
1115 if (type == DataType::Type::kReference) {
1116 codegen->MaybeGenerateMarkingRegisterCheck(/* code= */ 128);
1117 }
1118 }
1119
VisitUnsafeCASInt(HInvoke * invoke)1120 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1121 CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
1122 }
VisitUnsafeCASObject(HInvoke * invoke)1123 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1124 // The only read barrier implementation supporting the
1125 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1126 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1127 return;
1128 }
1129
1130 CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
1131 }
VisitUnsafeCASInt(HInvoke * invoke)1132 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1133 GenCas(invoke, DataType::Type::kInt32, codegen_);
1134 }
VisitUnsafeCASObject(HInvoke * invoke)1135 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1136 // The only read barrier implementation supporting the
1137 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1138 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1139
1140 GenCas(invoke, DataType::Type::kReference, codegen_);
1141 }
1142
VisitStringCompareTo(HInvoke * invoke)1143 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1144 // The inputs plus one temp.
1145 LocationSummary* locations =
1146 new (allocator_) LocationSummary(invoke,
1147 invoke->InputAt(1)->CanBeNull()
1148 ? LocationSummary::kCallOnSlowPath
1149 : LocationSummary::kNoCall,
1150 kIntrinsified);
1151 locations->SetInAt(0, Location::RequiresRegister());
1152 locations->SetInAt(1, Location::RequiresRegister());
1153 locations->AddTemp(Location::RequiresRegister());
1154 locations->AddTemp(Location::RequiresRegister());
1155 locations->AddTemp(Location::RequiresRegister());
1156 // Need temporary registers for String compression's feature.
1157 if (mirror::kUseStringCompression) {
1158 locations->AddTemp(Location::RequiresRegister());
1159 }
1160 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1161 }
1162
1163 // Forward declaration.
1164 //
1165 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
1166 // by the compiler for every C++ function, and if this function gets inlined in
1167 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
1168 // build failure. That is the reason why NO_INLINE attribute is used.
1169 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1170 HInvoke* invoke,
1171 vixl32::Label* end,
1172 vixl32::Label* different_compression);
1173
VisitStringCompareTo(HInvoke * invoke)1174 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1175 ArmVIXLAssembler* assembler = GetAssembler();
1176 LocationSummary* locations = invoke->GetLocations();
1177
1178 const vixl32::Register str = InputRegisterAt(invoke, 0);
1179 const vixl32::Register arg = InputRegisterAt(invoke, 1);
1180 const vixl32::Register out = OutputRegister(invoke);
1181
1182 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1183 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1184 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1185 vixl32::Register temp3;
1186 if (mirror::kUseStringCompression) {
1187 temp3 = RegisterFrom(locations->GetTemp(3));
1188 }
1189
1190 vixl32::Label end;
1191 vixl32::Label different_compression;
1192
1193 // Get offsets of count and value fields within a string object.
1194 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1195
1196 // Note that the null check must have been done earlier.
1197 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1198
1199 // Take slow path and throw if input can be and is null.
1200 SlowPathCodeARMVIXL* slow_path = nullptr;
1201 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1202 if (can_slow_path) {
1203 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1204 codegen_->AddSlowPath(slow_path);
1205 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
1206 }
1207
1208 // Reference equality check, return 0 if same reference.
1209 __ Subs(out, str, arg);
1210 __ B(eq, &end);
1211
1212 if (mirror::kUseStringCompression) {
1213 // Load `count` fields of this and argument strings.
1214 __ Ldr(temp3, MemOperand(str, count_offset));
1215 __ Ldr(temp2, MemOperand(arg, count_offset));
1216 // Extract lengths from the `count` fields.
1217 __ Lsr(temp0, temp3, 1u);
1218 __ Lsr(temp1, temp2, 1u);
1219 } else {
1220 // Load lengths of this and argument strings.
1221 __ Ldr(temp0, MemOperand(str, count_offset));
1222 __ Ldr(temp1, MemOperand(arg, count_offset));
1223 }
1224 // out = length diff.
1225 __ Subs(out, temp0, temp1);
1226 // temp0 = min(len(str), len(arg)).
1227
1228 {
1229 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1230 2 * kMaxInstructionSizeInBytes,
1231 CodeBufferCheckScope::kMaximumSize);
1232
1233 __ it(gt);
1234 __ mov(gt, temp0, temp1);
1235 }
1236
1237 // Shorter string is empty?
1238 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1239 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1240 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
1241
1242 if (mirror::kUseStringCompression) {
1243 // Check if both strings using same compression style to use this comparison loop.
1244 __ Eors(temp2, temp2, temp3);
1245 __ Lsrs(temp2, temp2, 1u);
1246 __ B(cs, &different_compression);
1247 // For string compression, calculate the number of bytes to compare (not chars).
1248 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1249 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
1250
1251 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1252 2 * kMaxInstructionSizeInBytes,
1253 CodeBufferCheckScope::kMaximumSize);
1254
1255 __ it(ne);
1256 __ add(ne, temp0, temp0, temp0);
1257 }
1258
1259
1260 GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
1261
1262 __ Bind(&end);
1263
1264 if (can_slow_path) {
1265 __ Bind(slow_path->GetExitLabel());
1266 }
1267 }
1268
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)1269 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1270 HInvoke* invoke,
1271 vixl32::Label* end,
1272 vixl32::Label* different_compression) {
1273 LocationSummary* locations = invoke->GetLocations();
1274
1275 const vixl32::Register str = InputRegisterAt(invoke, 0);
1276 const vixl32::Register arg = InputRegisterAt(invoke, 1);
1277 const vixl32::Register out = OutputRegister(invoke);
1278
1279 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1280 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1281 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1282 vixl32::Register temp3;
1283 if (mirror::kUseStringCompression) {
1284 temp3 = RegisterFrom(locations->GetTemp(3));
1285 }
1286
1287 vixl32::Label loop;
1288 vixl32::Label find_char_diff;
1289
1290 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1291 // Store offset of string value in preparation for comparison loop.
1292 __ Mov(temp1, value_offset);
1293
1294 // Assertions that must hold in order to compare multiple characters at a time.
1295 CHECK_ALIGNED(value_offset, 8);
1296 static_assert(IsAligned<8>(kObjectAlignment),
1297 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1298
1299 const unsigned char_size = DataType::Size(DataType::Type::kUint16);
1300 DCHECK_EQ(char_size, 2u);
1301
1302 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1303
1304 vixl32::Label find_char_diff_2nd_cmp;
1305 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1306 __ Bind(&loop);
1307 vixl32::Register temp_reg = temps.Acquire();
1308 __ Ldr(temp_reg, MemOperand(str, temp1));
1309 __ Ldr(temp2, MemOperand(arg, temp1));
1310 __ Cmp(temp_reg, temp2);
1311 __ B(ne, &find_char_diff, /* is_far_target= */ false);
1312 __ Add(temp1, temp1, char_size * 2);
1313
1314 __ Ldr(temp_reg, MemOperand(str, temp1));
1315 __ Ldr(temp2, MemOperand(arg, temp1));
1316 __ Cmp(temp_reg, temp2);
1317 __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
1318 __ Add(temp1, temp1, char_size * 2);
1319 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1320 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1321 __ B(hi, &loop, /* is_far_target= */ false);
1322 __ B(end);
1323
1324 __ Bind(&find_char_diff_2nd_cmp);
1325 if (mirror::kUseStringCompression) {
1326 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
1327 __ B(ls, end, /* is_far_target= */ false); // Was the second comparison fully beyond the end?
1328 } else {
1329 // Without string compression, we can start treating temp0 as signed
1330 // and rely on the signed comparison below.
1331 __ Sub(temp0, temp0, 2);
1332 }
1333
1334 // Find the single character difference.
1335 __ Bind(&find_char_diff);
1336 // Get the bit position of the first character that differs.
1337 __ Eor(temp1, temp2, temp_reg);
1338 __ Rbit(temp1, temp1);
1339 __ Clz(temp1, temp1);
1340
1341 // temp0 = number of characters remaining to compare.
1342 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1343 // in the comparison loop, and after the end of the shorter string data).
1344
1345 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1346 // two words compared, in the interval [0,1].
1347 // (0 for low half-word different, 1 for high half-word different).
1348 // With string compression, (temp1 << 3) = byte where the difference occurs,
1349 // in the interval [0,3].
1350
1351 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1352 // the remaining string data, so just return length diff (out).
1353 // The comparison is unsigned for string compression, otherwise signed.
1354 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1355 __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
1356
1357 // Extract the characters and calculate the difference.
1358 if (mirror::kUseStringCompression) {
1359 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1360 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1361 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
1362 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
1363 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
1364 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
1365 __ Lsr(temp2, temp2, temp1); // Extract second character.
1366 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
1367 __ Lsr(out, temp_reg, temp1); // Extract first character.
1368 __ And(temp2, temp2, temp3);
1369 __ And(out, out, temp3);
1370 } else {
1371 __ Bic(temp1, temp1, 0xf);
1372 __ Lsr(temp2, temp2, temp1);
1373 __ Lsr(out, temp_reg, temp1);
1374 __ Movt(temp2, 0);
1375 __ Movt(out, 0);
1376 }
1377
1378 __ Sub(out, out, temp2);
1379 temps.Release(temp_reg);
1380
1381 if (mirror::kUseStringCompression) {
1382 __ B(end);
1383 __ Bind(different_compression);
1384
1385 // Comparison for different compression style.
1386 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1387 DCHECK_EQ(c_char_size, 1u);
1388
1389 // We want to free up the temp3, currently holding `str.count`, for comparison.
1390 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1391 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1392 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1393 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1394 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
1395 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1396 __ Mov(temp1, str);
1397 __ Mov(temp2, arg);
1398 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
1399 {
1400 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1401 3 * kMaxInstructionSizeInBytes,
1402 CodeBufferCheckScope::kMaximumSize);
1403 __ itt(cs); // Interleave with selection of temp1 and temp2.
1404 __ mov(cs, temp1, arg); // Preserves flags.
1405 __ mov(cs, temp2, str); // Preserves flags.
1406 }
1407 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
1408
1409 // Adjust temp1 and temp2 from string pointers to data pointers.
1410 __ Add(temp1, temp1, value_offset);
1411 __ Add(temp2, temp2, value_offset);
1412
1413 vixl32::Label different_compression_loop;
1414 vixl32::Label different_compression_diff;
1415
1416 // Main loop for different compression.
1417 temp_reg = temps.Acquire();
1418 __ Bind(&different_compression_loop);
1419 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1420 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
1421 __ Cmp(temp_reg, temp3);
1422 __ B(ne, &different_compression_diff, /* is_far_target= */ false);
1423 __ Subs(temp0, temp0, 2);
1424 __ B(hi, &different_compression_loop, /* is_far_target= */ false);
1425 __ B(end);
1426
1427 // Calculate the difference.
1428 __ Bind(&different_compression_diff);
1429 __ Sub(out, temp_reg, temp3);
1430 temps.Release(temp_reg);
1431 // Flip the difference if the `arg` is compressed.
1432 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1433 __ Lsrs(temp0, temp0, 1u);
1434 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1435 "Expecting 0=compressed, 1=uncompressed");
1436
1437 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1438 2 * kMaxInstructionSizeInBytes,
1439 CodeBufferCheckScope::kMaximumSize);
1440 __ it(cc);
1441 __ rsb(cc, out, out, 0);
1442 }
1443 }
1444
1445 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1446 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
1447 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
1448 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
1449 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
1450 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
1451
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1452 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1453 if (candidate->IsLoadString()) {
1454 HLoadString* load_string = candidate->AsLoadString();
1455 const DexFile& dex_file = load_string->GetDexFile();
1456 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1457 }
1458 return nullptr;
1459 }
1460
VisitStringEquals(HInvoke * invoke)1461 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1462 LocationSummary* locations =
1463 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1464 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1465 locations->SetInAt(0, Location::RequiresRegister());
1466 locations->SetInAt(1, Location::RequiresRegister());
1467
1468 // Temporary registers to store lengths of strings and for calculations.
1469 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1470 locations->AddTemp(LocationFrom(r0));
1471
1472 // For the generic implementation and for long const strings we need an extra temporary.
1473 // We do not need it for short const strings, up to 4 bytes, see code generation below.
1474 uint32_t const_string_length = 0u;
1475 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1476 if (const_string == nullptr) {
1477 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1478 }
1479 bool is_compressed =
1480 mirror::kUseStringCompression &&
1481 const_string != nullptr &&
1482 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1483 if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
1484 locations->AddTemp(Location::RequiresRegister());
1485 }
1486
1487 // TODO: If the String.equals() is used only for an immediately following HIf, we can
1488 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1489 // Then we shall need an extra temporary register instead of the output register.
1490 locations->SetOut(Location::RequiresRegister());
1491 }
1492
VisitStringEquals(HInvoke * invoke)1493 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1494 ArmVIXLAssembler* assembler = GetAssembler();
1495 LocationSummary* locations = invoke->GetLocations();
1496
1497 vixl32::Register str = InputRegisterAt(invoke, 0);
1498 vixl32::Register arg = InputRegisterAt(invoke, 1);
1499 vixl32::Register out = OutputRegister(invoke);
1500
1501 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1502
1503 vixl32::Label loop;
1504 vixl32::Label end;
1505 vixl32::Label return_true;
1506 vixl32::Label return_false;
1507 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
1508
1509 // Get offsets of count, value, and class fields within a string object.
1510 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1511 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1512 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1513
1514 // Note that the null check must have been done earlier.
1515 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1516
1517 StringEqualsOptimizations optimizations(invoke);
1518 if (!optimizations.GetArgumentNotNull()) {
1519 // Check if input is null, return false if it is.
1520 __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
1521 }
1522
1523 // Reference equality check, return true if same reference.
1524 __ Cmp(str, arg);
1525 __ B(eq, &return_true, /* is_far_target= */ false);
1526
1527 if (!optimizations.GetArgumentIsString()) {
1528 // Instanceof check for the argument by comparing class fields.
1529 // All string objects must have the same type since String cannot be subclassed.
1530 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1531 // If the argument is a string object, its class field must be equal to receiver's class field.
1532 //
1533 // As the String class is expected to be non-movable, we can read the class
1534 // field from String.equals' arguments without read barriers.
1535 AssertNonMovableStringClass();
1536 // /* HeapReference<Class> */ temp = str->klass_
1537 __ Ldr(temp, MemOperand(str, class_offset));
1538 // /* HeapReference<Class> */ out = arg->klass_
1539 __ Ldr(out, MemOperand(arg, class_offset));
1540 // Also, because we use the previously loaded class references only in the
1541 // following comparison, we don't need to unpoison them.
1542 __ Cmp(temp, out);
1543 __ B(ne, &return_false, /* is_far_target= */ false);
1544 }
1545
1546 // Check if one of the inputs is a const string. Do not special-case both strings
1547 // being const, such cases should be handled by constant folding if needed.
1548 uint32_t const_string_length = 0u;
1549 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1550 if (const_string == nullptr) {
1551 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1552 if (const_string != nullptr) {
1553 std::swap(str, arg); // Make sure the const string is in `str`.
1554 }
1555 }
1556 bool is_compressed =
1557 mirror::kUseStringCompression &&
1558 const_string != nullptr &&
1559 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1560
1561 if (const_string != nullptr) {
1562 // Load `count` field of the argument string and check if it matches the const string.
1563 // Also compares the compression style, if differs return false.
1564 __ Ldr(temp, MemOperand(arg, count_offset));
1565 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1566 __ B(ne, &return_false, /* is_far_target= */ false);
1567 } else {
1568 // Load `count` fields of this and argument strings.
1569 __ Ldr(temp, MemOperand(str, count_offset));
1570 __ Ldr(out, MemOperand(arg, count_offset));
1571 // Check if `count` fields are equal, return false if they're not.
1572 // Also compares the compression style, if differs return false.
1573 __ Cmp(temp, out);
1574 __ B(ne, &return_false, /* is_far_target= */ false);
1575 }
1576
1577 // Assertions that must hold in order to compare strings 4 bytes at a time.
1578 // Ok to do this because strings are zero-padded to kObjectAlignment.
1579 DCHECK_ALIGNED(value_offset, 4);
1580 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1581
1582 if (const_string != nullptr &&
1583 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1584 : kShortConstStringEqualsCutoffInBytes / 2u)) {
1585 // Load and compare the contents. Though we know the contents of the short const string
1586 // at compile time, materializing constants may be more code than loading from memory.
1587 int32_t offset = value_offset;
1588 size_t remaining_bytes =
1589 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
1590 while (remaining_bytes > sizeof(uint32_t)) {
1591 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1592 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1593 vixl32::Register temp2 = scratch_scope.Acquire();
1594 __ Ldrd(temp, temp1, MemOperand(str, offset));
1595 __ Ldrd(temp2, out, MemOperand(arg, offset));
1596 __ Cmp(temp, temp2);
1597 __ B(ne, &return_false, /* is_far_target= */ false);
1598 __ Cmp(temp1, out);
1599 __ B(ne, &return_false, /* is_far_target= */ false);
1600 offset += 2u * sizeof(uint32_t);
1601 remaining_bytes -= 2u * sizeof(uint32_t);
1602 }
1603 if (remaining_bytes != 0u) {
1604 __ Ldr(temp, MemOperand(str, offset));
1605 __ Ldr(out, MemOperand(arg, offset));
1606 __ Cmp(temp, out);
1607 __ B(ne, &return_false, /* is_far_target= */ false);
1608 }
1609 } else {
1610 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1611 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1612 "Expecting 0=compressed, 1=uncompressed");
1613 __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
1614
1615 if (mirror::kUseStringCompression) {
1616 // For string compression, calculate the number of bytes to compare (not chars).
1617 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1618 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
1619 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1620 2 * kMaxInstructionSizeInBytes,
1621 CodeBufferCheckScope::kMaximumSize);
1622 __ it(cs); // If uncompressed,
1623 __ add(cs, temp, temp, temp); // double the byte count.
1624 }
1625
1626 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1627 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1628 vixl32::Register temp2 = scratch_scope.Acquire();
1629
1630 // Store offset of string value in preparation for comparison loop.
1631 __ Mov(temp1, value_offset);
1632
1633 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1634 __ Bind(&loop);
1635 __ Ldr(out, MemOperand(str, temp1));
1636 __ Ldr(temp2, MemOperand(arg, temp1));
1637 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1638 __ Cmp(out, temp2);
1639 __ B(ne, &return_false, /* is_far_target= */ false);
1640 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1641 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1642 __ B(hi, &loop, /* is_far_target= */ false);
1643 }
1644
1645 // Return true and exit the function.
1646 // If loop does not result in returning false, we return true.
1647 __ Bind(&return_true);
1648 __ Mov(out, 1);
1649 __ B(final_label);
1650
1651 // Return false and exit the function.
1652 __ Bind(&return_false);
1653 __ Mov(out, 0);
1654
1655 if (end.IsReferenced()) {
1656 __ Bind(&end);
1657 }
1658 }
1659
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1660 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1661 ArmVIXLAssembler* assembler,
1662 CodeGeneratorARMVIXL* codegen,
1663 bool start_at_zero) {
1664 LocationSummary* locations = invoke->GetLocations();
1665
1666 // Note that the null check must have been done earlier.
1667 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1668
1669 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1670 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1671 SlowPathCodeARMVIXL* slow_path = nullptr;
1672 HInstruction* code_point = invoke->InputAt(1);
1673 if (code_point->IsIntConstant()) {
1674 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1675 std::numeric_limits<uint16_t>::max()) {
1676 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1677 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1678 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1679 codegen->AddSlowPath(slow_path);
1680 __ B(slow_path->GetEntryLabel());
1681 __ Bind(slow_path->GetExitLabel());
1682 return;
1683 }
1684 } else if (code_point->GetType() != DataType::Type::kUint16) {
1685 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1686 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1687 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1688 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1689 codegen->AddSlowPath(slow_path);
1690 __ B(hs, slow_path->GetEntryLabel());
1691 }
1692
1693 if (start_at_zero) {
1694 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1695 DCHECK(tmp_reg.Is(r2));
1696 // Start-index = 0.
1697 __ Mov(tmp_reg, 0);
1698 }
1699
1700 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1701 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1702
1703 if (slow_path != nullptr) {
1704 __ Bind(slow_path->GetExitLabel());
1705 }
1706 }
1707
VisitStringIndexOf(HInvoke * invoke)1708 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1709 LocationSummary* locations = new (allocator_) LocationSummary(
1710 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1711 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1712 // best to align the inputs accordingly.
1713 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1714 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1715 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1716 locations->SetOut(LocationFrom(r0));
1717
1718 // Need to send start-index=0.
1719 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1720 }
1721
VisitStringIndexOf(HInvoke * invoke)1722 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1723 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1724 }
1725
VisitStringIndexOfAfter(HInvoke * invoke)1726 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1727 LocationSummary* locations = new (allocator_) LocationSummary(
1728 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1729 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1730 // best to align the inputs accordingly.
1731 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1732 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1733 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1734 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1735 locations->SetOut(LocationFrom(r0));
1736 }
1737
VisitStringIndexOfAfter(HInvoke * invoke)1738 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1739 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1740 }
1741
VisitStringNewStringFromBytes(HInvoke * invoke)1742 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1743 LocationSummary* locations = new (allocator_) LocationSummary(
1744 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1745 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1746 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1747 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1748 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1749 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1750 locations->SetOut(LocationFrom(r0));
1751 }
1752
VisitStringNewStringFromBytes(HInvoke * invoke)1753 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1754 ArmVIXLAssembler* assembler = GetAssembler();
1755 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1756 __ Cmp(byte_array, 0);
1757 SlowPathCodeARMVIXL* slow_path =
1758 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1759 codegen_->AddSlowPath(slow_path);
1760 __ B(eq, slow_path->GetEntryLabel());
1761
1762 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1763 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1764 __ Bind(slow_path->GetExitLabel());
1765 }
1766
VisitStringNewStringFromChars(HInvoke * invoke)1767 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1768 LocationSummary* locations =
1769 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1770 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1771 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1772 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1773 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1774 locations->SetOut(LocationFrom(r0));
1775 }
1776
VisitStringNewStringFromChars(HInvoke * invoke)1777 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1778 // No need to emit code checking whether `locations->InAt(2)` is a null
1779 // pointer, as callers of the native method
1780 //
1781 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1782 //
1783 // all include a null check on `data` before calling that method.
1784 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1785 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1786 }
1787
VisitStringNewStringFromString(HInvoke * invoke)1788 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1789 LocationSummary* locations = new (allocator_) LocationSummary(
1790 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1791 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1792 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1793 locations->SetOut(LocationFrom(r0));
1794 }
1795
VisitStringNewStringFromString(HInvoke * invoke)1796 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1797 ArmVIXLAssembler* assembler = GetAssembler();
1798 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1799 __ Cmp(string_to_copy, 0);
1800 SlowPathCodeARMVIXL* slow_path =
1801 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1802 codegen_->AddSlowPath(slow_path);
1803 __ B(eq, slow_path->GetEntryLabel());
1804
1805 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1806 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1807
1808 __ Bind(slow_path->GetExitLabel());
1809 }
1810
VisitSystemArrayCopy(HInvoke * invoke)1811 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1812 // The only read barrier implementation supporting the
1813 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1814 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1815 return;
1816 }
1817
1818 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1819 LocationSummary* locations = invoke->GetLocations();
1820 if (locations == nullptr) {
1821 return;
1822 }
1823
1824 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1825 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1826 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1827
1828 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1829 locations->SetInAt(1, Location::RequiresRegister());
1830 }
1831 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1832 locations->SetInAt(3, Location::RequiresRegister());
1833 }
1834 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1835 locations->SetInAt(4, Location::RequiresRegister());
1836 }
1837 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1838 // Temporary register IP cannot be used in
1839 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1840 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1841 // temporary register from the register allocator.
1842 locations->AddTemp(Location::RequiresRegister());
1843 }
1844 }
1845
CheckPosition(ArmVIXLAssembler * assembler,Location pos,vixl32::Register input,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_input_length=false)1846 static void CheckPosition(ArmVIXLAssembler* assembler,
1847 Location pos,
1848 vixl32::Register input,
1849 Location length,
1850 SlowPathCodeARMVIXL* slow_path,
1851 vixl32::Register temp,
1852 bool length_is_input_length = false) {
1853 // Where is the length in the Array?
1854 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1855
1856 if (pos.IsConstant()) {
1857 int32_t pos_const = Int32ConstantFrom(pos);
1858 if (pos_const == 0) {
1859 if (!length_is_input_length) {
1860 // Check that length(input) >= length.
1861 __ Ldr(temp, MemOperand(input, length_offset));
1862 if (length.IsConstant()) {
1863 __ Cmp(temp, Int32ConstantFrom(length));
1864 } else {
1865 __ Cmp(temp, RegisterFrom(length));
1866 }
1867 __ B(lt, slow_path->GetEntryLabel());
1868 }
1869 } else {
1870 // Check that length(input) >= pos.
1871 __ Ldr(temp, MemOperand(input, length_offset));
1872 __ Subs(temp, temp, pos_const);
1873 __ B(lt, slow_path->GetEntryLabel());
1874
1875 // Check that (length(input) - pos) >= length.
1876 if (length.IsConstant()) {
1877 __ Cmp(temp, Int32ConstantFrom(length));
1878 } else {
1879 __ Cmp(temp, RegisterFrom(length));
1880 }
1881 __ B(lt, slow_path->GetEntryLabel());
1882 }
1883 } else if (length_is_input_length) {
1884 // The only way the copy can succeed is if pos is zero.
1885 vixl32::Register pos_reg = RegisterFrom(pos);
1886 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1887 } else {
1888 // Check that pos >= 0.
1889 vixl32::Register pos_reg = RegisterFrom(pos);
1890 __ Cmp(pos_reg, 0);
1891 __ B(lt, slow_path->GetEntryLabel());
1892
1893 // Check that pos <= length(input).
1894 __ Ldr(temp, MemOperand(input, length_offset));
1895 __ Subs(temp, temp, pos_reg);
1896 __ B(lt, slow_path->GetEntryLabel());
1897
1898 // Check that (length(input) - pos) >= length.
1899 if (length.IsConstant()) {
1900 __ Cmp(temp, Int32ConstantFrom(length));
1901 } else {
1902 __ Cmp(temp, RegisterFrom(length));
1903 }
1904 __ B(lt, slow_path->GetEntryLabel());
1905 }
1906 }
1907
VisitSystemArrayCopy(HInvoke * invoke)1908 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1909 // The only read barrier implementation supporting the
1910 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1911 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1912
1913 ArmVIXLAssembler* assembler = GetAssembler();
1914 LocationSummary* locations = invoke->GetLocations();
1915
1916 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1917 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1918 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1919 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1920 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1921
1922 vixl32::Register src = InputRegisterAt(invoke, 0);
1923 Location src_pos = locations->InAt(1);
1924 vixl32::Register dest = InputRegisterAt(invoke, 2);
1925 Location dest_pos = locations->InAt(3);
1926 Location length = locations->InAt(4);
1927 Location temp1_loc = locations->GetTemp(0);
1928 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1929 Location temp2_loc = locations->GetTemp(1);
1930 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1931 Location temp3_loc = locations->GetTemp(2);
1932 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1933
1934 SlowPathCodeARMVIXL* intrinsic_slow_path =
1935 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1936 codegen_->AddSlowPath(intrinsic_slow_path);
1937
1938 vixl32::Label conditions_on_positions_validated;
1939 SystemArrayCopyOptimizations optimizations(invoke);
1940
1941 // If source and destination are the same, we go to slow path if we need to do
1942 // forward copying.
1943 if (src_pos.IsConstant()) {
1944 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1945 if (dest_pos.IsConstant()) {
1946 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1947 if (optimizations.GetDestinationIsSource()) {
1948 // Checked when building locations.
1949 DCHECK_GE(src_pos_constant, dest_pos_constant);
1950 } else if (src_pos_constant < dest_pos_constant) {
1951 __ Cmp(src, dest);
1952 __ B(eq, intrinsic_slow_path->GetEntryLabel());
1953 }
1954
1955 // Checked when building locations.
1956 DCHECK(!optimizations.GetDestinationIsSource()
1957 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1958 } else {
1959 if (!optimizations.GetDestinationIsSource()) {
1960 __ Cmp(src, dest);
1961 __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1962 }
1963 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1964 __ B(gt, intrinsic_slow_path->GetEntryLabel());
1965 }
1966 } else {
1967 if (!optimizations.GetDestinationIsSource()) {
1968 __ Cmp(src, dest);
1969 __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1970 }
1971 if (dest_pos.IsConstant()) {
1972 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1973 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1974 } else {
1975 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1976 }
1977 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1978 }
1979
1980 __ Bind(&conditions_on_positions_validated);
1981
1982 if (!optimizations.GetSourceIsNotNull()) {
1983 // Bail out if the source is null.
1984 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1985 }
1986
1987 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1988 // Bail out if the destination is null.
1989 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1990 }
1991
1992 // If the length is negative, bail out.
1993 // We have already checked in the LocationsBuilder for the constant case.
1994 if (!length.IsConstant() &&
1995 !optimizations.GetCountIsSourceLength() &&
1996 !optimizations.GetCountIsDestinationLength()) {
1997 __ Cmp(RegisterFrom(length), 0);
1998 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1999 }
2000
2001 // Validity checks: source.
2002 CheckPosition(assembler,
2003 src_pos,
2004 src,
2005 length,
2006 intrinsic_slow_path,
2007 temp1,
2008 optimizations.GetCountIsSourceLength());
2009
2010 // Validity checks: dest.
2011 CheckPosition(assembler,
2012 dest_pos,
2013 dest,
2014 length,
2015 intrinsic_slow_path,
2016 temp1,
2017 optimizations.GetCountIsDestinationLength());
2018
2019 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2020 // Check whether all elements of the source array are assignable to the component
2021 // type of the destination array. We do two checks: the classes are the same,
2022 // or the destination is Object[]. If none of these checks succeed, we go to the
2023 // slow path.
2024
2025 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2026 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2027 // /* HeapReference<Class> */ temp1 = src->klass_
2028 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2029 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
2030 // Bail out if the source is not a non primitive array.
2031 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2032 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2033 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
2034 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
2035 // If heap poisoning is enabled, `temp1` has been unpoisoned
2036 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2037 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2038 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
2039 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2040 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2041 }
2042
2043 // /* HeapReference<Class> */ temp1 = dest->klass_
2044 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2045 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
2046
2047 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2048 // Bail out if the destination is not a non primitive array.
2049 //
2050 // Register `temp1` is not trashed by the read barrier emitted
2051 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2052 // method produces a call to a ReadBarrierMarkRegX entry point,
2053 // which saves all potentially live registers, including
2054 // temporaries such a `temp1`.
2055 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2056 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2057 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false);
2058 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
2059 // If heap poisoning is enabled, `temp2` has been unpoisoned
2060 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2061 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2062 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
2063 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2064 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
2065 }
2066
2067 // For the same reason given earlier, `temp1` is not trashed by the
2068 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2069 // /* HeapReference<Class> */ temp2 = src->klass_
2070 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2071 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
2072 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2073 __ Cmp(temp1, temp2);
2074
2075 if (optimizations.GetDestinationIsTypedObjectArray()) {
2076 vixl32::Label do_copy;
2077 __ B(eq, &do_copy, /* is_far_target= */ false);
2078 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2079 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2080 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
2081 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2082 // We do not need to emit a read barrier for the following
2083 // heap reference load, as `temp1` is only used in a
2084 // comparison with null below, and this reference is not
2085 // kept afterwards.
2086 __ Ldr(temp1, MemOperand(temp1, super_offset));
2087 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2088 __ Bind(&do_copy);
2089 } else {
2090 __ B(ne, intrinsic_slow_path->GetEntryLabel());
2091 }
2092 } else {
2093 // Non read barrier code.
2094
2095 // /* HeapReference<Class> */ temp1 = dest->klass_
2096 __ Ldr(temp1, MemOperand(dest, class_offset));
2097 // /* HeapReference<Class> */ temp2 = src->klass_
2098 __ Ldr(temp2, MemOperand(src, class_offset));
2099 bool did_unpoison = false;
2100 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2101 !optimizations.GetSourceIsNonPrimitiveArray()) {
2102 // One or two of the references need to be unpoisoned. Unpoison them
2103 // both to make the identity check valid.
2104 assembler->MaybeUnpoisonHeapReference(temp1);
2105 assembler->MaybeUnpoisonHeapReference(temp2);
2106 did_unpoison = true;
2107 }
2108
2109 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2110 // Bail out if the destination is not a non primitive array.
2111 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2112 __ Ldr(temp3, MemOperand(temp1, component_offset));
2113 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2114 assembler->MaybeUnpoisonHeapReference(temp3);
2115 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2116 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2117 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2118 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2119 }
2120
2121 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2122 // Bail out if the source is not a non primitive array.
2123 // /* HeapReference<Class> */ temp3 = temp2->component_type_
2124 __ Ldr(temp3, MemOperand(temp2, component_offset));
2125 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2126 assembler->MaybeUnpoisonHeapReference(temp3);
2127 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2128 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2129 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2130 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2131 }
2132
2133 __ Cmp(temp1, temp2);
2134
2135 if (optimizations.GetDestinationIsTypedObjectArray()) {
2136 vixl32::Label do_copy;
2137 __ B(eq, &do_copy, /* is_far_target= */ false);
2138 if (!did_unpoison) {
2139 assembler->MaybeUnpoisonHeapReference(temp1);
2140 }
2141 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2142 __ Ldr(temp1, MemOperand(temp1, component_offset));
2143 assembler->MaybeUnpoisonHeapReference(temp1);
2144 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2145 __ Ldr(temp1, MemOperand(temp1, super_offset));
2146 // No need to unpoison the result, we're comparing against null.
2147 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2148 __ Bind(&do_copy);
2149 } else {
2150 __ B(ne, intrinsic_slow_path->GetEntryLabel());
2151 }
2152 }
2153 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2154 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2155 // Bail out if the source is not a non primitive array.
2156 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2157 // /* HeapReference<Class> */ temp1 = src->klass_
2158 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2159 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
2160 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2161 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2162 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
2163 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2164 // If heap poisoning is enabled, `temp3` has been unpoisoned
2165 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2166 } else {
2167 // /* HeapReference<Class> */ temp1 = src->klass_
2168 __ Ldr(temp1, MemOperand(src, class_offset));
2169 assembler->MaybeUnpoisonHeapReference(temp1);
2170 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2171 __ Ldr(temp3, MemOperand(temp1, component_offset));
2172 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2173 assembler->MaybeUnpoisonHeapReference(temp3);
2174 }
2175 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2176 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2177 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2178 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2179 }
2180
2181 if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
2182 // Null constant length: not need to emit the loop code at all.
2183 } else {
2184 vixl32::Label done;
2185 const DataType::Type type = DataType::Type::kReference;
2186 const int32_t element_size = DataType::Size(type);
2187
2188 if (length.IsRegister()) {
2189 // Don't enter the copy loop if the length is null.
2190 __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
2191 }
2192
2193 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2194 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2195
2196 // SystemArrayCopy implementation for Baker read barriers (see
2197 // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
2198 //
2199 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2200 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2201 // bool is_gray = (rb_state == ReadBarrier::GrayState());
2202 // if (is_gray) {
2203 // // Slow-path copy.
2204 // do {
2205 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2206 // } while (src_ptr != end_ptr)
2207 // } else {
2208 // // Fast-path copy.
2209 // do {
2210 // *dest_ptr++ = *src_ptr++;
2211 // } while (src_ptr != end_ptr)
2212 // }
2213
2214 // /* int32_t */ monitor = src->monitor_
2215 __ Ldr(temp2, MemOperand(src, monitor_offset));
2216 // /* LockWord */ lock_word = LockWord(monitor)
2217 static_assert(sizeof(LockWord) == sizeof(int32_t),
2218 "art::LockWord and int32_t have different sizes.");
2219
2220 // Introduce a dependency on the lock_word including the rb_state,
2221 // which shall prevent load-load reordering without using
2222 // a memory barrier (which would be more expensive).
2223 // `src` is unchanged by this operation, but its value now depends
2224 // on `temp2`.
2225 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2226
2227 // Compute the base source address in `temp1`.
2228 // Note that `temp1` (the base source address) is computed from
2229 // `src` (and `src_pos`) here, and thus honors the artificial
2230 // dependency of `src` on `temp2`.
2231 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2232 // Compute the end source address in `temp3`.
2233 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2234 // The base destination address is computed later, as `temp2` is
2235 // used for intermediate computations.
2236
2237 // Slow path used to copy array when `src` is gray.
2238 // Note that the base destination address is computed in `temp2`
2239 // by the slow path code.
2240 SlowPathCodeARMVIXL* read_barrier_slow_path =
2241 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2242 codegen_->AddSlowPath(read_barrier_slow_path);
2243
2244 // Given the numeric representation, it's enough to check the low bit of the
2245 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2246 // which can be a 16-bit instruction unlike the TST immediate.
2247 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2248 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2249 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2250 // Carry flag is the last bit shifted out by LSRS.
2251 __ B(cs, read_barrier_slow_path->GetEntryLabel());
2252
2253 // Fast-path copy.
2254 // Compute the base destination address in `temp2`.
2255 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2256 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2257 // poison/unpoison.
2258 vixl32::Label loop;
2259 __ Bind(&loop);
2260 {
2261 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2262 const vixl32::Register temp_reg = temps.Acquire();
2263 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2264 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2265 }
2266 __ Cmp(temp1, temp3);
2267 __ B(ne, &loop, /* is_far_target= */ false);
2268
2269 __ Bind(read_barrier_slow_path->GetExitLabel());
2270 } else {
2271 // Non read barrier code.
2272 // Compute the base source address in `temp1`.
2273 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2274 // Compute the base destination address in `temp2`.
2275 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2276 // Compute the end source address in `temp3`.
2277 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2278 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2279 // poison/unpoison.
2280 vixl32::Label loop;
2281 __ Bind(&loop);
2282 {
2283 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2284 const vixl32::Register temp_reg = temps.Acquire();
2285 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2286 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2287 }
2288 __ Cmp(temp1, temp3);
2289 __ B(ne, &loop, /* is_far_target= */ false);
2290 }
2291 __ Bind(&done);
2292 }
2293
2294 // We only need one card marking on the destination array.
2295 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* can_be_null= */ false);
2296
2297 __ Bind(intrinsic_slow_path->GetExitLabel());
2298 }
2299
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2300 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2301 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2302 // the code generator. Furthermore, the register allocator creates fixed live intervals
2303 // for all caller-saved registers because we are doing a function call. As a result, if
2304 // the input and output locations are unallocated, the register allocator runs out of
2305 // registers and fails; however, a debuggable graph is not the common case.
2306 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2307 return;
2308 }
2309
2310 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2311 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2312 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2313
2314 LocationSummary* const locations =
2315 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2316 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2317
2318 locations->SetInAt(0, Location::RequiresFpuRegister());
2319 locations->SetOut(Location::RequiresFpuRegister());
2320 // Native code uses the soft float ABI.
2321 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2322 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2323 }
2324
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2325 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2326 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2327 // the code generator. Furthermore, the register allocator creates fixed live intervals
2328 // for all caller-saved registers because we are doing a function call. As a result, if
2329 // the input and output locations are unallocated, the register allocator runs out of
2330 // registers and fails; however, a debuggable graph is not the common case.
2331 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2332 return;
2333 }
2334
2335 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2336 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2337 DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
2338 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2339
2340 LocationSummary* const locations =
2341 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2342 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2343
2344 locations->SetInAt(0, Location::RequiresFpuRegister());
2345 locations->SetInAt(1, Location::RequiresFpuRegister());
2346 locations->SetOut(Location::RequiresFpuRegister());
2347 // Native code uses the soft float ABI.
2348 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2349 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2350 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2351 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2352 }
2353
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2354 static void GenFPToFPCall(HInvoke* invoke,
2355 ArmVIXLAssembler* assembler,
2356 CodeGeneratorARMVIXL* codegen,
2357 QuickEntrypointEnum entry) {
2358 LocationSummary* const locations = invoke->GetLocations();
2359
2360 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2361 DCHECK(locations->WillCall() && locations->Intrinsified());
2362
2363 // Native code uses the soft float ABI.
2364 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2365 RegisterFrom(locations->GetTemp(1)),
2366 InputDRegisterAt(invoke, 0));
2367 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2368 __ Vmov(OutputDRegister(invoke),
2369 RegisterFrom(locations->GetTemp(0)),
2370 RegisterFrom(locations->GetTemp(1)));
2371 }
2372
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2373 static void GenFPFPToFPCall(HInvoke* invoke,
2374 ArmVIXLAssembler* assembler,
2375 CodeGeneratorARMVIXL* codegen,
2376 QuickEntrypointEnum entry) {
2377 LocationSummary* const locations = invoke->GetLocations();
2378
2379 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2380 DCHECK(locations->WillCall() && locations->Intrinsified());
2381
2382 // Native code uses the soft float ABI.
2383 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2384 RegisterFrom(locations->GetTemp(1)),
2385 InputDRegisterAt(invoke, 0));
2386 __ Vmov(RegisterFrom(locations->GetTemp(2)),
2387 RegisterFrom(locations->GetTemp(3)),
2388 InputDRegisterAt(invoke, 1));
2389 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2390 __ Vmov(OutputDRegister(invoke),
2391 RegisterFrom(locations->GetTemp(0)),
2392 RegisterFrom(locations->GetTemp(1)));
2393 }
2394
VisitMathCos(HInvoke * invoke)2395 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2396 CreateFPToFPCallLocations(allocator_, invoke);
2397 }
2398
VisitMathCos(HInvoke * invoke)2399 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2400 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2401 }
2402
VisitMathSin(HInvoke * invoke)2403 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2404 CreateFPToFPCallLocations(allocator_, invoke);
2405 }
2406
VisitMathSin(HInvoke * invoke)2407 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2408 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2409 }
2410
VisitMathAcos(HInvoke * invoke)2411 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2412 CreateFPToFPCallLocations(allocator_, invoke);
2413 }
2414
VisitMathAcos(HInvoke * invoke)2415 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2416 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2417 }
2418
VisitMathAsin(HInvoke * invoke)2419 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2420 CreateFPToFPCallLocations(allocator_, invoke);
2421 }
2422
VisitMathAsin(HInvoke * invoke)2423 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2424 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2425 }
2426
VisitMathAtan(HInvoke * invoke)2427 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2428 CreateFPToFPCallLocations(allocator_, invoke);
2429 }
2430
VisitMathAtan(HInvoke * invoke)2431 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2432 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2433 }
2434
VisitMathCbrt(HInvoke * invoke)2435 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2436 CreateFPToFPCallLocations(allocator_, invoke);
2437 }
2438
VisitMathCbrt(HInvoke * invoke)2439 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2440 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2441 }
2442
VisitMathCosh(HInvoke * invoke)2443 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2444 CreateFPToFPCallLocations(allocator_, invoke);
2445 }
2446
VisitMathCosh(HInvoke * invoke)2447 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2448 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2449 }
2450
VisitMathExp(HInvoke * invoke)2451 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2452 CreateFPToFPCallLocations(allocator_, invoke);
2453 }
2454
VisitMathExp(HInvoke * invoke)2455 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2456 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2457 }
2458
VisitMathExpm1(HInvoke * invoke)2459 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2460 CreateFPToFPCallLocations(allocator_, invoke);
2461 }
2462
VisitMathExpm1(HInvoke * invoke)2463 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2464 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2465 }
2466
VisitMathLog(HInvoke * invoke)2467 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2468 CreateFPToFPCallLocations(allocator_, invoke);
2469 }
2470
VisitMathLog(HInvoke * invoke)2471 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2472 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2473 }
2474
VisitMathLog10(HInvoke * invoke)2475 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2476 CreateFPToFPCallLocations(allocator_, invoke);
2477 }
2478
VisitMathLog10(HInvoke * invoke)2479 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2480 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2481 }
2482
VisitMathSinh(HInvoke * invoke)2483 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2484 CreateFPToFPCallLocations(allocator_, invoke);
2485 }
2486
VisitMathSinh(HInvoke * invoke)2487 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2488 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2489 }
2490
VisitMathTan(HInvoke * invoke)2491 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2492 CreateFPToFPCallLocations(allocator_, invoke);
2493 }
2494
VisitMathTan(HInvoke * invoke)2495 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2496 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2497 }
2498
VisitMathTanh(HInvoke * invoke)2499 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2500 CreateFPToFPCallLocations(allocator_, invoke);
2501 }
2502
VisitMathTanh(HInvoke * invoke)2503 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2504 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2505 }
2506
VisitMathAtan2(HInvoke * invoke)2507 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2508 CreateFPFPToFPCallLocations(allocator_, invoke);
2509 }
2510
VisitMathAtan2(HInvoke * invoke)2511 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2512 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2513 }
2514
VisitMathPow(HInvoke * invoke)2515 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
2516 CreateFPFPToFPCallLocations(allocator_, invoke);
2517 }
2518
VisitMathPow(HInvoke * invoke)2519 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
2520 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
2521 }
2522
VisitMathHypot(HInvoke * invoke)2523 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2524 CreateFPFPToFPCallLocations(allocator_, invoke);
2525 }
2526
VisitMathHypot(HInvoke * invoke)2527 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2528 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2529 }
2530
VisitMathNextAfter(HInvoke * invoke)2531 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2532 CreateFPFPToFPCallLocations(allocator_, invoke);
2533 }
2534
VisitMathNextAfter(HInvoke * invoke)2535 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2536 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2537 }
2538
VisitIntegerReverse(HInvoke * invoke)2539 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2540 CreateIntToIntLocations(allocator_, invoke);
2541 }
2542
VisitIntegerReverse(HInvoke * invoke)2543 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2544 ArmVIXLAssembler* assembler = GetAssembler();
2545 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2546 }
2547
VisitLongReverse(HInvoke * invoke)2548 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2549 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2550 }
2551
VisitLongReverse(HInvoke * invoke)2552 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2553 ArmVIXLAssembler* assembler = GetAssembler();
2554 LocationSummary* locations = invoke->GetLocations();
2555
2556 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2557 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2558 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2559 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2560
2561 __ Rbit(out_reg_lo, in_reg_hi);
2562 __ Rbit(out_reg_hi, in_reg_lo);
2563 }
2564
VisitIntegerReverseBytes(HInvoke * invoke)2565 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2566 CreateIntToIntLocations(allocator_, invoke);
2567 }
2568
VisitIntegerReverseBytes(HInvoke * invoke)2569 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2570 ArmVIXLAssembler* assembler = GetAssembler();
2571 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2572 }
2573
VisitLongReverseBytes(HInvoke * invoke)2574 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2575 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2576 }
2577
VisitLongReverseBytes(HInvoke * invoke)2578 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2579 ArmVIXLAssembler* assembler = GetAssembler();
2580 LocationSummary* locations = invoke->GetLocations();
2581
2582 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2583 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2584 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2585 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2586
2587 __ Rev(out_reg_lo, in_reg_hi);
2588 __ Rev(out_reg_hi, in_reg_lo);
2589 }
2590
VisitShortReverseBytes(HInvoke * invoke)2591 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2592 CreateIntToIntLocations(allocator_, invoke);
2593 }
2594
VisitShortReverseBytes(HInvoke * invoke)2595 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2596 ArmVIXLAssembler* assembler = GetAssembler();
2597 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2598 }
2599
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)2600 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
2601 DCHECK(DataType::IsIntOrLongType(type)) << type;
2602 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
2603 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
2604
2605 bool is_long = type == DataType::Type::kInt64;
2606 LocationSummary* locations = instr->GetLocations();
2607 Location in = locations->InAt(0);
2608 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2609 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2610 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2611 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2612 vixl32::Register out_r = OutputRegister(instr);
2613
2614 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2615 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2616 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2617 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2618 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2619 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2620 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2621 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2622 if (is_long) {
2623 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2624 }
2625 __ Vmov(out_r, tmp_s);
2626 }
2627
VisitIntegerBitCount(HInvoke * invoke)2628 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2629 CreateIntToIntLocations(allocator_, invoke);
2630 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2631 }
2632
VisitIntegerBitCount(HInvoke * invoke)2633 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2634 GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
2635 }
2636
VisitLongBitCount(HInvoke * invoke)2637 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2638 VisitIntegerBitCount(invoke);
2639 }
2640
VisitLongBitCount(HInvoke * invoke)2641 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2642 GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
2643 }
2644
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2645 static void GenHighestOneBit(HInvoke* invoke,
2646 DataType::Type type,
2647 CodeGeneratorARMVIXL* codegen) {
2648 DCHECK(DataType::IsIntOrLongType(type));
2649
2650 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2651 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2652 const vixl32::Register temp = temps.Acquire();
2653
2654 if (type == DataType::Type::kInt64) {
2655 LocationSummary* locations = invoke->GetLocations();
2656 Location in = locations->InAt(0);
2657 Location out = locations->Out();
2658
2659 vixl32::Register in_reg_lo = LowRegisterFrom(in);
2660 vixl32::Register in_reg_hi = HighRegisterFrom(in);
2661 vixl32::Register out_reg_lo = LowRegisterFrom(out);
2662 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2663
2664 __ Mov(temp, 0x80000000); // Modified immediate.
2665 __ Clz(out_reg_lo, in_reg_lo);
2666 __ Clz(out_reg_hi, in_reg_hi);
2667 __ Lsr(out_reg_lo, temp, out_reg_lo);
2668 __ Lsrs(out_reg_hi, temp, out_reg_hi);
2669
2670 // Discard result for lowest 32 bits if highest 32 bits are not zero.
2671 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2672 // we check that the output is in a low register, so that a 16-bit MOV
2673 // encoding can be used. If output is in a high register, then we generate
2674 // 4 more bytes of code to avoid a branch.
2675 Operand mov_src(0);
2676 if (!out_reg_lo.IsLow()) {
2677 __ Mov(LeaveFlags, temp, 0);
2678 mov_src = Operand(temp);
2679 }
2680 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2681 2 * vixl32::k16BitT32InstructionSizeInBytes,
2682 CodeBufferCheckScope::kExactSize);
2683 __ it(ne);
2684 __ mov(ne, out_reg_lo, mov_src);
2685 } else {
2686 vixl32::Register out = OutputRegister(invoke);
2687 vixl32::Register in = InputRegisterAt(invoke, 0);
2688
2689 __ Mov(temp, 0x80000000); // Modified immediate.
2690 __ Clz(out, in);
2691 __ Lsr(out, temp, out);
2692 }
2693 }
2694
VisitIntegerHighestOneBit(HInvoke * invoke)2695 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2696 CreateIntToIntLocations(allocator_, invoke);
2697 }
2698
VisitIntegerHighestOneBit(HInvoke * invoke)2699 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2700 GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2701 }
2702
VisitLongHighestOneBit(HInvoke * invoke)2703 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2704 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2705 }
2706
VisitLongHighestOneBit(HInvoke * invoke)2707 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2708 GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2709 }
2710
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2711 static void GenLowestOneBit(HInvoke* invoke,
2712 DataType::Type type,
2713 CodeGeneratorARMVIXL* codegen) {
2714 DCHECK(DataType::IsIntOrLongType(type));
2715
2716 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2717 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2718 const vixl32::Register temp = temps.Acquire();
2719
2720 if (type == DataType::Type::kInt64) {
2721 LocationSummary* locations = invoke->GetLocations();
2722 Location in = locations->InAt(0);
2723 Location out = locations->Out();
2724
2725 vixl32::Register in_reg_lo = LowRegisterFrom(in);
2726 vixl32::Register in_reg_hi = HighRegisterFrom(in);
2727 vixl32::Register out_reg_lo = LowRegisterFrom(out);
2728 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2729
2730 __ Rsb(out_reg_hi, in_reg_hi, 0);
2731 __ Rsb(out_reg_lo, in_reg_lo, 0);
2732 __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2733 // The result of this operation is 0 iff in_reg_lo is 0
2734 __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2735
2736 // Discard result for highest 32 bits if lowest 32 bits are not zero.
2737 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2738 // we check that the output is in a low register, so that a 16-bit MOV
2739 // encoding can be used. If output is in a high register, then we generate
2740 // 4 more bytes of code to avoid a branch.
2741 Operand mov_src(0);
2742 if (!out_reg_lo.IsLow()) {
2743 __ Mov(LeaveFlags, temp, 0);
2744 mov_src = Operand(temp);
2745 }
2746 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2747 2 * vixl32::k16BitT32InstructionSizeInBytes,
2748 CodeBufferCheckScope::kExactSize);
2749 __ it(ne);
2750 __ mov(ne, out_reg_hi, mov_src);
2751 } else {
2752 vixl32::Register out = OutputRegister(invoke);
2753 vixl32::Register in = InputRegisterAt(invoke, 0);
2754
2755 __ Rsb(temp, in, 0);
2756 __ And(out, temp, in);
2757 }
2758 }
2759
VisitIntegerLowestOneBit(HInvoke * invoke)2760 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2761 CreateIntToIntLocations(allocator_, invoke);
2762 }
2763
VisitIntegerLowestOneBit(HInvoke * invoke)2764 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2765 GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2766 }
2767
VisitLongLowestOneBit(HInvoke * invoke)2768 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2769 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2770 }
2771
VisitLongLowestOneBit(HInvoke * invoke)2772 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2773 GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2774 }
2775
VisitStringGetCharsNoCheck(HInvoke * invoke)2776 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2777 LocationSummary* locations =
2778 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2779 locations->SetInAt(0, Location::RequiresRegister());
2780 locations->SetInAt(1, Location::RequiresRegister());
2781 locations->SetInAt(2, Location::RequiresRegister());
2782 locations->SetInAt(3, Location::RequiresRegister());
2783 locations->SetInAt(4, Location::RequiresRegister());
2784
2785 // Temporary registers to store lengths of strings and for calculations.
2786 locations->AddTemp(Location::RequiresRegister());
2787 locations->AddTemp(Location::RequiresRegister());
2788 locations->AddTemp(Location::RequiresRegister());
2789 }
2790
VisitStringGetCharsNoCheck(HInvoke * invoke)2791 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2792 ArmVIXLAssembler* assembler = GetAssembler();
2793 LocationSummary* locations = invoke->GetLocations();
2794
2795 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2796 const size_t char_size = DataType::Size(DataType::Type::kUint16);
2797 DCHECK_EQ(char_size, 2u);
2798
2799 // Location of data in char array buffer.
2800 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2801
2802 // Location of char array data in string.
2803 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2804
2805 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2806 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2807 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2808 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2809 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2810 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2811 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2812
2813 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2814 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2815 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2816
2817 vixl32::Label done, compressed_string_loop;
2818 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2819 // dst to be copied.
2820 __ Add(dst_ptr, dstObj, data_offset);
2821 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2822
2823 __ Subs(num_chr, srcEnd, srcBegin);
2824 // Early out for valid zero-length retrievals.
2825 __ B(eq, final_label, /* is_far_target= */ false);
2826
2827 // src range to copy.
2828 __ Add(src_ptr, srcObj, value_offset);
2829
2830 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2831 vixl32::Register temp;
2832 vixl32::Label compressed_string_preloop;
2833 if (mirror::kUseStringCompression) {
2834 // Location of count in string.
2835 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2836 temp = temps.Acquire();
2837 // String's length.
2838 __ Ldr(temp, MemOperand(srcObj, count_offset));
2839 __ Tst(temp, 1);
2840 temps.Release(temp);
2841 __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2842 }
2843 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2844
2845 // Do the copy.
2846 vixl32::Label loop, remainder;
2847
2848 temp = temps.Acquire();
2849 // Save repairing the value of num_chr on the < 4 character path.
2850 __ Subs(temp, num_chr, 4);
2851 __ B(lt, &remainder, /* is_far_target= */ false);
2852
2853 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2854 __ Mov(num_chr, temp);
2855
2856 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2857 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2858 // to rectify these everywhere this intrinsic applies.)
2859 __ Bind(&loop);
2860 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2861 __ Subs(num_chr, num_chr, 4);
2862 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2863 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2864 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2865 temps.Release(temp);
2866 __ B(ge, &loop, /* is_far_target= */ false);
2867
2868 __ Adds(num_chr, num_chr, 4);
2869 __ B(eq, final_label, /* is_far_target= */ false);
2870
2871 // Main loop for < 4 character case and remainder handling. Loads and stores one
2872 // 16-bit Java character at a time.
2873 __ Bind(&remainder);
2874 temp = temps.Acquire();
2875 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2876 __ Subs(num_chr, num_chr, 1);
2877 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2878 temps.Release(temp);
2879 __ B(gt, &remainder, /* is_far_target= */ false);
2880
2881 if (mirror::kUseStringCompression) {
2882 __ B(final_label);
2883
2884 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2885 DCHECK_EQ(c_char_size, 1u);
2886 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2887 __ Bind(&compressed_string_preloop);
2888 __ Add(src_ptr, src_ptr, srcBegin);
2889 __ Bind(&compressed_string_loop);
2890 temp = temps.Acquire();
2891 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2892 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2893 temps.Release(temp);
2894 __ Subs(num_chr, num_chr, 1);
2895 __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2896 }
2897
2898 if (done.IsReferenced()) {
2899 __ Bind(&done);
2900 }
2901 }
2902
VisitFloatIsInfinite(HInvoke * invoke)2903 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2904 CreateFPToIntLocations(allocator_, invoke);
2905 }
2906
VisitFloatIsInfinite(HInvoke * invoke)2907 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2908 ArmVIXLAssembler* const assembler = GetAssembler();
2909 const vixl32::Register out = OutputRegister(invoke);
2910 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2911 // we don't care about the sign bit anyway.
2912 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2913
2914 __ Vmov(out, InputSRegisterAt(invoke, 0));
2915 // We don't care about the sign bit, so shift left.
2916 __ Lsl(out, out, 1);
2917 __ Eor(out, out, infinity);
2918 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2919 }
2920
VisitDoubleIsInfinite(HInvoke * invoke)2921 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2922 CreateFPToIntLocations(allocator_, invoke);
2923 }
2924
VisitDoubleIsInfinite(HInvoke * invoke)2925 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2926 ArmVIXLAssembler* const assembler = GetAssembler();
2927 const vixl32::Register out = OutputRegister(invoke);
2928 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2929 const vixl32::Register temp = temps.Acquire();
2930 // The highest 32 bits of double precision positive infinity separated into
2931 // two constants encodable as immediate operands.
2932 constexpr uint32_t infinity_high = 0x7f000000U;
2933 constexpr uint32_t infinity_high2 = 0x00f00000U;
2934
2935 static_assert((infinity_high | infinity_high2) ==
2936 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2937 "The constants do not add up to the high 32 bits of double "
2938 "precision positive infinity.");
2939 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2940 __ Eor(out, out, infinity_high);
2941 __ Eor(out, out, infinity_high2);
2942 // We don't care about the sign bit, so shift left.
2943 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2944 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2945 }
2946
VisitMathCeil(HInvoke * invoke)2947 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2948 if (features_.HasARMv8AInstructions()) {
2949 CreateFPToFPLocations(allocator_, invoke);
2950 }
2951 }
2952
VisitMathCeil(HInvoke * invoke)2953 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2954 ArmVIXLAssembler* assembler = GetAssembler();
2955 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2956 __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2957 }
2958
VisitMathFloor(HInvoke * invoke)2959 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2960 if (features_.HasARMv8AInstructions()) {
2961 CreateFPToFPLocations(allocator_, invoke);
2962 }
2963 }
2964
VisitMathFloor(HInvoke * invoke)2965 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2966 ArmVIXLAssembler* assembler = GetAssembler();
2967 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2968 __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2969 }
2970
VisitIntegerValueOf(HInvoke * invoke)2971 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2972 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2973 IntrinsicVisitor::ComputeIntegerValueOfLocations(
2974 invoke,
2975 codegen_,
2976 LocationFrom(r0),
2977 LocationFrom(calling_convention.GetRegisterAt(0)));
2978 }
2979
VisitIntegerValueOf(HInvoke * invoke)2980 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2981 IntrinsicVisitor::IntegerValueOfInfo info =
2982 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2983 LocationSummary* locations = invoke->GetLocations();
2984 ArmVIXLAssembler* const assembler = GetAssembler();
2985
2986 vixl32::Register out = RegisterFrom(locations->Out());
2987 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2988 vixl32::Register temp = temps.Acquire();
2989 if (invoke->InputAt(0)->IsConstant()) {
2990 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2991 if (static_cast<uint32_t>(value - info.low) < info.length) {
2992 // Just embed the j.l.Integer in the code.
2993 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2994 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2995 } else {
2996 DCHECK(locations->CanCall());
2997 // Allocate and initialize a new j.l.Integer.
2998 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2999 // JIT object table.
3000 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
3001 info.integer_boot_image_offset);
3002 __ Mov(temp, value);
3003 assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
3004 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3005 // one.
3006 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3007 }
3008 } else {
3009 DCHECK(locations->CanCall());
3010 vixl32::Register in = RegisterFrom(locations->InAt(0));
3011 // Check bounds of our cache.
3012 __ Add(out, in, -info.low);
3013 __ Cmp(out, info.length);
3014 vixl32::Label allocate, done;
3015 __ B(hs, &allocate, /* is_far_target= */ false);
3016 // If the value is within the bounds, load the j.l.Integer directly from the array.
3017 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
3018 codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
3019 assembler->MaybeUnpoisonHeapReference(out);
3020 __ B(&done);
3021 __ Bind(&allocate);
3022 // Otherwise allocate and initialize a new j.l.Integer.
3023 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
3024 info.integer_boot_image_offset);
3025 assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
3026 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3027 // one.
3028 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3029 __ Bind(&done);
3030 }
3031 }
3032
VisitThreadInterrupted(HInvoke * invoke)3033 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3034 LocationSummary* locations =
3035 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3036 locations->SetOut(Location::RequiresRegister());
3037 }
3038
VisitThreadInterrupted(HInvoke * invoke)3039 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3040 ArmVIXLAssembler* assembler = GetAssembler();
3041 vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
3042 int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
3043 __ Ldr(out, MemOperand(tr, offset));
3044 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3045 vixl32::Register temp = temps.Acquire();
3046 vixl32::Label done;
3047 vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
3048 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
3049 __ Dmb(vixl32::ISH);
3050 __ Mov(temp, 0);
3051 assembler->StoreToOffset(kStoreWord, temp, tr, offset);
3052 __ Dmb(vixl32::ISH);
3053 if (done.IsReferenced()) {
3054 __ Bind(&done);
3055 }
3056 }
3057
VisitReachabilityFence(HInvoke * invoke)3058 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
3059 LocationSummary* locations =
3060 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3061 locations->SetInAt(0, Location::Any());
3062 }
3063
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3064 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3065
3066 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
3067 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
3068 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
3069 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
3070 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
3071 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
3072 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
3073 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat)
3074 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf)
3075 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor)
3076 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil)
3077 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint)
3078 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater)
3079 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals)
3080 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less)
3081 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals)
3082
3083 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
3084 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
3085 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
3086 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
3087 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
3088 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject);
3089 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString);
3090 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence);
3091 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray);
3092 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean);
3093 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar);
3094 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt);
3095 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong);
3096 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat);
3097 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble);
3098 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
3099 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
3100
3101 // 1.8.
3102 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
3103 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
3104 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
3105 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
3106 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
3107
3108 UNREACHABLE_INTRINSICS(ARMVIXL)
3109
3110 #undef __
3111
3112 } // namespace arm
3113 } // namespace art
3114