1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm_vixl.h"
18
19 #include "arch/arm/callee_save_frame_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "art_method.h"
22 #include "code_generator_arm_vixl.h"
23 #include "common_arm.h"
24 #include "heap_poisoning.h"
25 #include "intrinsics.h"
26 #include "intrinsics_utils.h"
27 #include "lock_word.h"
28 #include "mirror/array-inl.h"
29 #include "mirror/object_array-inl.h"
30 #include "mirror/reference.h"
31 #include "mirror/string-inl.h"
32 #include "scoped_thread_state_change-inl.h"
33 #include "thread-current-inl.h"
34
35 #include "aarch32/constants-aarch32.h"
36
37 namespace art {
38 namespace arm {
39
40 #define __ assembler->GetVIXLAssembler()->
41
42 using helpers::DRegisterFrom;
43 using helpers::HighRegisterFrom;
44 using helpers::InputDRegisterAt;
45 using helpers::InputRegisterAt;
46 using helpers::InputSRegisterAt;
47 using helpers::Int32ConstantFrom;
48 using helpers::LocationFrom;
49 using helpers::LowRegisterFrom;
50 using helpers::LowSRegisterFrom;
51 using helpers::HighSRegisterFrom;
52 using helpers::OutputDRegister;
53 using helpers::OutputRegister;
54 using helpers::RegisterFrom;
55 using helpers::SRegisterFrom;
56
57 using namespace vixl::aarch32; // NOLINT(build/namespaces)
58
59 using vixl::ExactAssemblyScope;
60 using vixl::CodeBufferCheckScope;
61
GetAssembler()62 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
63 return codegen_->GetAssembler();
64 }
65
GetAllocator()66 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
67 return codegen_->GetGraph()->GetAllocator();
68 }
69
70 using IntrinsicSlowPathARMVIXL = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARMVIXL,
71 SlowPathCodeARMVIXL,
72 ArmVIXLAssembler>;
73
74 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmVIXLAssembler * assembler,DataType::Type type,const vixl32::Register & array,const Location & pos,const vixl32::Register & base)75 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
76 DataType::Type type,
77 const vixl32::Register& array,
78 const Location& pos,
79 const vixl32::Register& base) {
80 // This routine is only used by the SystemArrayCopy intrinsic at the
81 // moment. We can allow DataType::Type::kReference as `type` to implement
82 // the SystemArrayCopyChar intrinsic.
83 DCHECK_EQ(type, DataType::Type::kReference);
84 const int32_t element_size = DataType::Size(type);
85 const uint32_t element_size_shift = DataType::SizeShift(type);
86 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
87
88 if (pos.IsConstant()) {
89 int32_t constant = Int32ConstantFrom(pos);
90 __ Add(base, array, element_size * constant + data_offset);
91 } else {
92 __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
93 __ Add(base, base, data_offset);
94 }
95 }
96
97 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmVIXLAssembler * assembler,DataType::Type type,const Location & copy_length,const vixl32::Register & base,const vixl32::Register & end)98 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
99 DataType::Type type,
100 const Location& copy_length,
101 const vixl32::Register& base,
102 const vixl32::Register& end) {
103 // This routine is only used by the SystemArrayCopy intrinsic at the
104 // moment. We can allow DataType::Type::kReference as `type` to implement
105 // the SystemArrayCopyChar intrinsic.
106 DCHECK_EQ(type, DataType::Type::kReference);
107 const int32_t element_size = DataType::Size(type);
108 const uint32_t element_size_shift = DataType::SizeShift(type);
109
110 if (copy_length.IsConstant()) {
111 int32_t constant = Int32ConstantFrom(copy_length);
112 __ Add(end, base, element_size * constant);
113 } else {
114 __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
115 }
116 }
117
118 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
119 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
120 public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)121 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
122 : SlowPathCodeARMVIXL(instruction) {
123 DCHECK(kEmitCompilerReadBarrier);
124 DCHECK(kUseBakerReadBarrier);
125 }
126
EmitNativeCode(CodeGenerator * codegen)127 void EmitNativeCode(CodeGenerator* codegen) override {
128 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
129 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
130 LocationSummary* locations = instruction_->GetLocations();
131 DCHECK(locations->CanCall());
132 DCHECK(instruction_->IsInvokeStaticOrDirect())
133 << "Unexpected instruction in read barrier arraycopy slow path: "
134 << instruction_->DebugName();
135 DCHECK(instruction_->GetLocations()->Intrinsified());
136 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
137
138 DataType::Type type = DataType::Type::kReference;
139 const int32_t element_size = DataType::Size(type);
140
141 vixl32::Register dest = InputRegisterAt(instruction_, 2);
142 Location dest_pos = locations->InAt(3);
143 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
144 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
145 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
146 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
147
148 __ Bind(GetEntryLabel());
149 // Compute the base destination address in `dst_curr_addr`.
150 GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
151
152 vixl32::Label loop;
153 __ Bind(&loop);
154 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
155 assembler->MaybeUnpoisonHeapReference(tmp);
156 // TODO: Inline the mark bit check before calling the runtime?
157 // tmp = ReadBarrier::Mark(tmp);
158 // No need to save live registers; it's taken care of by the
159 // entrypoint. Also, there is no need to update the stack mask,
160 // as this runtime call will not trigger a garbage collection.
161 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
162 // explanations.)
163 DCHECK(!tmp.IsSP());
164 DCHECK(!tmp.IsLR());
165 DCHECK(!tmp.IsPC());
166 // IP is used internally by the ReadBarrierMarkRegX entry point
167 // as a temporary (and not preserved). It thus cannot be used by
168 // any live register in this slow path.
169 DCHECK(!src_curr_addr.Is(ip));
170 DCHECK(!dst_curr_addr.Is(ip));
171 DCHECK(!src_stop_addr.Is(ip));
172 DCHECK(!tmp.Is(ip));
173 DCHECK(tmp.IsRegister()) << tmp;
174 // TODO: Load the entrypoint once before the loop, instead of
175 // loading it at every iteration.
176 int32_t entry_point_offset =
177 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
178 // This runtime call does not require a stack map.
179 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
180 assembler->MaybePoisonHeapReference(tmp);
181 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
182 __ Cmp(src_curr_addr, src_stop_addr);
183 __ B(ne, &loop, /* is_far_target= */ false);
184 __ B(GetExitLabel());
185 }
186
GetDescription() const187 const char* GetDescription() const override {
188 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
189 }
190
191 private:
192 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
193 };
194
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)195 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
196 : allocator_(codegen->GetGraph()->GetAllocator()),
197 codegen_(codegen),
198 assembler_(codegen->GetAssembler()),
199 features_(codegen->GetInstructionSetFeatures()) {}
200
TryDispatch(HInvoke * invoke)201 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
202 Dispatch(invoke);
203 LocationSummary* res = invoke->GetLocations();
204 if (res == nullptr) {
205 return false;
206 }
207 return res->Intrinsified();
208 }
209
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)210 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
211 LocationSummary* locations =
212 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
213 locations->SetInAt(0, Location::RequiresFpuRegister());
214 locations->SetOut(Location::RequiresRegister());
215 }
216
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)217 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
218 LocationSummary* locations =
219 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
220 locations->SetInAt(0, Location::RequiresRegister());
221 locations->SetOut(Location::RequiresFpuRegister());
222 }
223
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)224 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
225 Location input = locations->InAt(0);
226 Location output = locations->Out();
227 if (is64bit) {
228 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
229 } else {
230 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
231 }
232 }
233
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)234 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
235 Location input = locations->InAt(0);
236 Location output = locations->Out();
237 if (is64bit) {
238 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
239 } else {
240 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
241 }
242 }
243
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)244 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
245 CreateFPToIntLocations(allocator_, invoke);
246 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)247 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
248 CreateIntToFPLocations(allocator_, invoke);
249 }
250
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)251 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
252 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
253 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)254 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
255 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
256 }
257
VisitFloatFloatToRawIntBits(HInvoke * invoke)258 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
259 CreateFPToIntLocations(allocator_, invoke);
260 }
VisitFloatIntBitsToFloat(HInvoke * invoke)261 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
262 CreateIntToFPLocations(allocator_, invoke);
263 }
264
VisitFloatFloatToRawIntBits(HInvoke * invoke)265 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
266 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
267 }
VisitFloatIntBitsToFloat(HInvoke * invoke)268 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
269 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
270 }
271
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)272 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
273 LocationSummary* locations =
274 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
275 locations->SetInAt(0, Location::RequiresRegister());
276 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
277 }
278
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)279 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
280 LocationSummary* locations =
281 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
282 locations->SetInAt(0, Location::RequiresRegister());
283 locations->SetInAt(1, Location::RequiresRegister());
284 // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
285 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
286 }
287
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)288 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
289 LocationSummary* locations =
290 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
291 locations->SetInAt(0, Location::RequiresRegister());
292 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
293 }
294
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)295 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
296 LocationSummary* locations =
297 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
298 locations->SetInAt(0, Location::RequiresFpuRegister());
299 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
300 }
301
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)302 static void GenNumberOfLeadingZeros(HInvoke* invoke,
303 DataType::Type type,
304 CodeGeneratorARMVIXL* codegen) {
305 ArmVIXLAssembler* assembler = codegen->GetAssembler();
306 LocationSummary* locations = invoke->GetLocations();
307 Location in = locations->InAt(0);
308 vixl32::Register out = RegisterFrom(locations->Out());
309
310 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
311
312 if (type == DataType::Type::kInt64) {
313 vixl32::Register in_reg_lo = LowRegisterFrom(in);
314 vixl32::Register in_reg_hi = HighRegisterFrom(in);
315 vixl32::Label end;
316 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
317 __ Clz(out, in_reg_hi);
318 __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
319 __ Clz(out, in_reg_lo);
320 __ Add(out, out, 32);
321 if (end.IsReferenced()) {
322 __ Bind(&end);
323 }
324 } else {
325 __ Clz(out, RegisterFrom(in));
326 }
327 }
328
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)329 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
330 CreateIntToIntLocations(allocator_, invoke);
331 }
332
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)333 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
334 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
335 }
336
VisitLongNumberOfLeadingZeros(HInvoke * invoke)337 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
338 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
339 }
340
VisitLongNumberOfLeadingZeros(HInvoke * invoke)341 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
342 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
343 }
344
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)345 static void GenNumberOfTrailingZeros(HInvoke* invoke,
346 DataType::Type type,
347 CodeGeneratorARMVIXL* codegen) {
348 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
349
350 ArmVIXLAssembler* assembler = codegen->GetAssembler();
351 LocationSummary* locations = invoke->GetLocations();
352 vixl32::Register out = RegisterFrom(locations->Out());
353
354 if (type == DataType::Type::kInt64) {
355 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
356 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
357 vixl32::Label end;
358 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
359 __ Rbit(out, in_reg_lo);
360 __ Clz(out, out);
361 __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
362 __ Rbit(out, in_reg_hi);
363 __ Clz(out, out);
364 __ Add(out, out, 32);
365 if (end.IsReferenced()) {
366 __ Bind(&end);
367 }
368 } else {
369 vixl32::Register in = RegisterFrom(locations->InAt(0));
370 __ Rbit(out, in);
371 __ Clz(out, out);
372 }
373 }
374
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)375 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
376 CreateIntToIntLocations(allocator_, invoke);
377 }
378
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)379 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
380 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
381 }
382
VisitLongNumberOfTrailingZeros(HInvoke * invoke)383 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
384 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
385 }
386
VisitLongNumberOfTrailingZeros(HInvoke * invoke)387 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
388 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
389 }
390
VisitMathSqrt(HInvoke * invoke)391 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
392 CreateFPToFPLocations(allocator_, invoke);
393 }
394
VisitMathSqrt(HInvoke * invoke)395 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
396 ArmVIXLAssembler* assembler = GetAssembler();
397 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
398 }
399
VisitMathRint(HInvoke * invoke)400 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
401 if (features_.HasARMv8AInstructions()) {
402 CreateFPToFPLocations(allocator_, invoke);
403 }
404 }
405
VisitMathRint(HInvoke * invoke)406 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
407 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
408 ArmVIXLAssembler* assembler = GetAssembler();
409 __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
410 }
411
VisitMathRoundFloat(HInvoke * invoke)412 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
413 if (features_.HasARMv8AInstructions()) {
414 LocationSummary* locations =
415 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
416 locations->SetInAt(0, Location::RequiresFpuRegister());
417 locations->SetOut(Location::RequiresRegister());
418 locations->AddTemp(Location::RequiresFpuRegister());
419 }
420 }
421
VisitMathRoundFloat(HInvoke * invoke)422 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
423 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
424
425 ArmVIXLAssembler* assembler = GetAssembler();
426 vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
427 vixl32::Register out_reg = OutputRegister(invoke);
428 vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
429 vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
430 vixl32::Label done;
431 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
432
433 // Round to nearest integer, ties away from zero.
434 __ Vcvta(S32, F32, temp1, in_reg);
435 __ Vmov(out_reg, temp1);
436
437 // For positive, zero or NaN inputs, rounding is done.
438 __ Cmp(out_reg, 0);
439 __ B(ge, final_label, /* is_far_target= */ false);
440
441 // Handle input < 0 cases.
442 // If input is negative but not a tie, previous result (round to nearest) is valid.
443 // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
444 __ Vrinta(F32, temp1, in_reg);
445 __ Vmov(temp2, 0.5);
446 __ Vsub(F32, temp1, in_reg, temp1);
447 __ Vcmp(F32, temp1, temp2);
448 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
449 {
450 // Use ExactAssemblyScope here because we are using IT.
451 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
452 2 * kMaxInstructionSizeInBytes,
453 CodeBufferCheckScope::kMaximumSize);
454 __ it(eq);
455 __ add(eq, out_reg, out_reg, 1);
456 }
457
458 if (done.IsReferenced()) {
459 __ Bind(&done);
460 }
461 }
462
VisitMemoryPeekByte(HInvoke * invoke)463 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
464 CreateIntToIntLocations(allocator_, invoke);
465 }
466
VisitMemoryPeekByte(HInvoke * invoke)467 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
468 ArmVIXLAssembler* assembler = GetAssembler();
469 // Ignore upper 4B of long address.
470 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
471 }
472
VisitMemoryPeekIntNative(HInvoke * invoke)473 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
474 CreateIntToIntLocations(allocator_, invoke);
475 }
476
VisitMemoryPeekIntNative(HInvoke * invoke)477 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
478 ArmVIXLAssembler* assembler = GetAssembler();
479 // Ignore upper 4B of long address.
480 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
481 }
482
VisitMemoryPeekLongNative(HInvoke * invoke)483 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
484 CreateIntToIntLocations(allocator_, invoke);
485 }
486
VisitMemoryPeekLongNative(HInvoke * invoke)487 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
488 ArmVIXLAssembler* assembler = GetAssembler();
489 // Ignore upper 4B of long address.
490 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
491 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
492 // exception. So we can't use ldrd as addr may be unaligned.
493 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
494 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
495 if (addr.Is(lo)) {
496 __ Ldr(hi, MemOperand(addr, 4));
497 __ Ldr(lo, MemOperand(addr));
498 } else {
499 __ Ldr(lo, MemOperand(addr));
500 __ Ldr(hi, MemOperand(addr, 4));
501 }
502 }
503
VisitMemoryPeekShortNative(HInvoke * invoke)504 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
505 CreateIntToIntLocations(allocator_, invoke);
506 }
507
VisitMemoryPeekShortNative(HInvoke * invoke)508 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
509 ArmVIXLAssembler* assembler = GetAssembler();
510 // Ignore upper 4B of long address.
511 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
512 }
513
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)514 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
515 LocationSummary* locations =
516 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
517 locations->SetInAt(0, Location::RequiresRegister());
518 locations->SetInAt(1, Location::RequiresRegister());
519 }
520
VisitMemoryPokeByte(HInvoke * invoke)521 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
522 CreateIntIntToVoidLocations(allocator_, invoke);
523 }
524
VisitMemoryPokeByte(HInvoke * invoke)525 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
526 ArmVIXLAssembler* assembler = GetAssembler();
527 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
528 }
529
VisitMemoryPokeIntNative(HInvoke * invoke)530 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
531 CreateIntIntToVoidLocations(allocator_, invoke);
532 }
533
VisitMemoryPokeIntNative(HInvoke * invoke)534 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
535 ArmVIXLAssembler* assembler = GetAssembler();
536 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
537 }
538
VisitMemoryPokeLongNative(HInvoke * invoke)539 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
540 CreateIntIntToVoidLocations(allocator_, invoke);
541 }
542
VisitMemoryPokeLongNative(HInvoke * invoke)543 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
544 ArmVIXLAssembler* assembler = GetAssembler();
545 // Ignore upper 4B of long address.
546 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
547 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
548 // exception. So we can't use ldrd as addr may be unaligned.
549 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
550 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
551 }
552
VisitMemoryPokeShortNative(HInvoke * invoke)553 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
554 CreateIntIntToVoidLocations(allocator_, invoke);
555 }
556
VisitMemoryPokeShortNative(HInvoke * invoke)557 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
558 ArmVIXLAssembler* assembler = GetAssembler();
559 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
560 }
561
VisitThreadCurrentThread(HInvoke * invoke)562 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
563 LocationSummary* locations =
564 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
565 locations->SetOut(Location::RequiresRegister());
566 }
567
VisitThreadCurrentThread(HInvoke * invoke)568 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
569 ArmVIXLAssembler* assembler = GetAssembler();
570 __ Ldr(OutputRegister(invoke),
571 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
572 }
573
VisitStringCompareTo(HInvoke * invoke)574 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
575 // The inputs plus one temp.
576 LocationSummary* locations =
577 new (allocator_) LocationSummary(invoke,
578 invoke->InputAt(1)->CanBeNull()
579 ? LocationSummary::kCallOnSlowPath
580 : LocationSummary::kNoCall,
581 kIntrinsified);
582 locations->SetInAt(0, Location::RequiresRegister());
583 locations->SetInAt(1, Location::RequiresRegister());
584 locations->AddTemp(Location::RequiresRegister());
585 locations->AddTemp(Location::RequiresRegister());
586 locations->AddTemp(Location::RequiresRegister());
587 // Need temporary registers for String compression's feature.
588 if (mirror::kUseStringCompression) {
589 locations->AddTemp(Location::RequiresRegister());
590 }
591 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
592 }
593
594 // Forward declaration.
595 //
596 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
597 // by the compiler for every C++ function, and if this function gets inlined in
598 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
599 // build failure. That is the reason why NO_INLINE attribute is used.
600 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
601 HInvoke* invoke,
602 vixl32::Label* end,
603 vixl32::Label* different_compression);
604
VisitStringCompareTo(HInvoke * invoke)605 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
606 ArmVIXLAssembler* assembler = GetAssembler();
607 LocationSummary* locations = invoke->GetLocations();
608
609 const vixl32::Register str = InputRegisterAt(invoke, 0);
610 const vixl32::Register arg = InputRegisterAt(invoke, 1);
611 const vixl32::Register out = OutputRegister(invoke);
612
613 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
614 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
615 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
616 vixl32::Register temp3;
617 if (mirror::kUseStringCompression) {
618 temp3 = RegisterFrom(locations->GetTemp(3));
619 }
620
621 vixl32::Label end;
622 vixl32::Label different_compression;
623
624 // Get offsets of count and value fields within a string object.
625 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
626
627 // Note that the null check must have been done earlier.
628 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
629
630 // Take slow path and throw if input can be and is null.
631 SlowPathCodeARMVIXL* slow_path = nullptr;
632 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
633 if (can_slow_path) {
634 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
635 codegen_->AddSlowPath(slow_path);
636 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
637 }
638
639 // Reference equality check, return 0 if same reference.
640 __ Subs(out, str, arg);
641 __ B(eq, &end);
642
643 if (mirror::kUseStringCompression) {
644 // Load `count` fields of this and argument strings.
645 __ Ldr(temp3, MemOperand(str, count_offset));
646 __ Ldr(temp2, MemOperand(arg, count_offset));
647 // Extract lengths from the `count` fields.
648 __ Lsr(temp0, temp3, 1u);
649 __ Lsr(temp1, temp2, 1u);
650 } else {
651 // Load lengths of this and argument strings.
652 __ Ldr(temp0, MemOperand(str, count_offset));
653 __ Ldr(temp1, MemOperand(arg, count_offset));
654 }
655 // out = length diff.
656 __ Subs(out, temp0, temp1);
657 // temp0 = min(len(str), len(arg)).
658
659 {
660 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
661 2 * kMaxInstructionSizeInBytes,
662 CodeBufferCheckScope::kMaximumSize);
663
664 __ it(gt);
665 __ mov(gt, temp0, temp1);
666 }
667
668 // Shorter string is empty?
669 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
670 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
671 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
672
673 if (mirror::kUseStringCompression) {
674 // Check if both strings using same compression style to use this comparison loop.
675 __ Eors(temp2, temp2, temp3);
676 __ Lsrs(temp2, temp2, 1u);
677 __ B(cs, &different_compression);
678 // For string compression, calculate the number of bytes to compare (not chars).
679 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
680 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
681
682 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
683 2 * kMaxInstructionSizeInBytes,
684 CodeBufferCheckScope::kMaximumSize);
685
686 __ it(ne);
687 __ add(ne, temp0, temp0, temp0);
688 }
689
690
691 GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
692
693 __ Bind(&end);
694
695 if (can_slow_path) {
696 __ Bind(slow_path->GetExitLabel());
697 }
698 }
699
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)700 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
701 HInvoke* invoke,
702 vixl32::Label* end,
703 vixl32::Label* different_compression) {
704 LocationSummary* locations = invoke->GetLocations();
705
706 const vixl32::Register str = InputRegisterAt(invoke, 0);
707 const vixl32::Register arg = InputRegisterAt(invoke, 1);
708 const vixl32::Register out = OutputRegister(invoke);
709
710 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
711 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
712 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
713 vixl32::Register temp3;
714 if (mirror::kUseStringCompression) {
715 temp3 = RegisterFrom(locations->GetTemp(3));
716 }
717
718 vixl32::Label loop;
719 vixl32::Label find_char_diff;
720
721 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
722 // Store offset of string value in preparation for comparison loop.
723 __ Mov(temp1, value_offset);
724
725 // Assertions that must hold in order to compare multiple characters at a time.
726 CHECK_ALIGNED(value_offset, 8);
727 static_assert(IsAligned<8>(kObjectAlignment),
728 "String data must be 8-byte aligned for unrolled CompareTo loop.");
729
730 const unsigned char_size = DataType::Size(DataType::Type::kUint16);
731 DCHECK_EQ(char_size, 2u);
732
733 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
734
735 vixl32::Label find_char_diff_2nd_cmp;
736 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
737 __ Bind(&loop);
738 vixl32::Register temp_reg = temps.Acquire();
739 __ Ldr(temp_reg, MemOperand(str, temp1));
740 __ Ldr(temp2, MemOperand(arg, temp1));
741 __ Cmp(temp_reg, temp2);
742 __ B(ne, &find_char_diff, /* is_far_target= */ false);
743 __ Add(temp1, temp1, char_size * 2);
744
745 __ Ldr(temp_reg, MemOperand(str, temp1));
746 __ Ldr(temp2, MemOperand(arg, temp1));
747 __ Cmp(temp_reg, temp2);
748 __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
749 __ Add(temp1, temp1, char_size * 2);
750 // With string compression, we have compared 8 bytes, otherwise 4 chars.
751 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
752 __ B(hi, &loop, /* is_far_target= */ false);
753 __ B(end);
754
755 __ Bind(&find_char_diff_2nd_cmp);
756 if (mirror::kUseStringCompression) {
757 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
758 __ B(ls, end, /* is_far_target= */ false); // Was the second comparison fully beyond the end?
759 } else {
760 // Without string compression, we can start treating temp0 as signed
761 // and rely on the signed comparison below.
762 __ Sub(temp0, temp0, 2);
763 }
764
765 // Find the single character difference.
766 __ Bind(&find_char_diff);
767 // Get the bit position of the first character that differs.
768 __ Eor(temp1, temp2, temp_reg);
769 __ Rbit(temp1, temp1);
770 __ Clz(temp1, temp1);
771
772 // temp0 = number of characters remaining to compare.
773 // (Without string compression, it could be < 1 if a difference is found by the second CMP
774 // in the comparison loop, and after the end of the shorter string data).
775
776 // Without string compression (temp1 >> 4) = character where difference occurs between the last
777 // two words compared, in the interval [0,1].
778 // (0 for low half-word different, 1 for high half-word different).
779 // With string compression, (temp1 << 3) = byte where the difference occurs,
780 // in the interval [0,3].
781
782 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
783 // the remaining string data, so just return length diff (out).
784 // The comparison is unsigned for string compression, otherwise signed.
785 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
786 __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
787
788 // Extract the characters and calculate the difference.
789 if (mirror::kUseStringCompression) {
790 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
791 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
792 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
793 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
794 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
795 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
796 __ Lsr(temp2, temp2, temp1); // Extract second character.
797 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
798 __ Lsr(out, temp_reg, temp1); // Extract first character.
799 __ And(temp2, temp2, temp3);
800 __ And(out, out, temp3);
801 } else {
802 __ Bic(temp1, temp1, 0xf);
803 __ Lsr(temp2, temp2, temp1);
804 __ Lsr(out, temp_reg, temp1);
805 __ Movt(temp2, 0);
806 __ Movt(out, 0);
807 }
808
809 __ Sub(out, out, temp2);
810 temps.Release(temp_reg);
811
812 if (mirror::kUseStringCompression) {
813 __ B(end);
814 __ Bind(different_compression);
815
816 // Comparison for different compression style.
817 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
818 DCHECK_EQ(c_char_size, 1u);
819
820 // We want to free up the temp3, currently holding `str.count`, for comparison.
821 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
822 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
823 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
824 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
825 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
826 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
827 __ Mov(temp1, str);
828 __ Mov(temp2, arg);
829 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
830 {
831 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
832 3 * kMaxInstructionSizeInBytes,
833 CodeBufferCheckScope::kMaximumSize);
834 __ itt(cs); // Interleave with selection of temp1 and temp2.
835 __ mov(cs, temp1, arg); // Preserves flags.
836 __ mov(cs, temp2, str); // Preserves flags.
837 }
838 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
839
840 // Adjust temp1 and temp2 from string pointers to data pointers.
841 __ Add(temp1, temp1, value_offset);
842 __ Add(temp2, temp2, value_offset);
843
844 vixl32::Label different_compression_loop;
845 vixl32::Label different_compression_diff;
846
847 // Main loop for different compression.
848 temp_reg = temps.Acquire();
849 __ Bind(&different_compression_loop);
850 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
851 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
852 __ Cmp(temp_reg, temp3);
853 __ B(ne, &different_compression_diff, /* is_far_target= */ false);
854 __ Subs(temp0, temp0, 2);
855 __ B(hi, &different_compression_loop, /* is_far_target= */ false);
856 __ B(end);
857
858 // Calculate the difference.
859 __ Bind(&different_compression_diff);
860 __ Sub(out, temp_reg, temp3);
861 temps.Release(temp_reg);
862 // Flip the difference if the `arg` is compressed.
863 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
864 __ Lsrs(temp0, temp0, 1u);
865 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
866 "Expecting 0=compressed, 1=uncompressed");
867
868 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
869 2 * kMaxInstructionSizeInBytes,
870 CodeBufferCheckScope::kMaximumSize);
871 __ it(cc);
872 __ rsb(cc, out, out, 0);
873 }
874 }
875
876 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
877 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
878 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
879 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
880 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
881 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
882
GetConstString(HInstruction * candidate,uint32_t * utf16_length)883 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
884 if (candidate->IsLoadString()) {
885 HLoadString* load_string = candidate->AsLoadString();
886 const DexFile& dex_file = load_string->GetDexFile();
887 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
888 }
889 return nullptr;
890 }
891
VisitStringEquals(HInvoke * invoke)892 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
893 LocationSummary* locations =
894 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
895 InvokeRuntimeCallingConventionARMVIXL calling_convention;
896 locations->SetInAt(0, Location::RequiresRegister());
897 locations->SetInAt(1, Location::RequiresRegister());
898
899 // Temporary registers to store lengths of strings and for calculations.
900 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
901 locations->AddTemp(LocationFrom(r0));
902
903 // For the generic implementation and for long const strings we need an extra temporary.
904 // We do not need it for short const strings, up to 4 bytes, see code generation below.
905 uint32_t const_string_length = 0u;
906 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
907 if (const_string == nullptr) {
908 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
909 }
910 bool is_compressed =
911 mirror::kUseStringCompression &&
912 const_string != nullptr &&
913 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
914 if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
915 locations->AddTemp(Location::RequiresRegister());
916 }
917
918 // TODO: If the String.equals() is used only for an immediately following HIf, we can
919 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
920 // Then we shall need an extra temporary register instead of the output register.
921 locations->SetOut(Location::RequiresRegister());
922 }
923
VisitStringEquals(HInvoke * invoke)924 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
925 ArmVIXLAssembler* assembler = GetAssembler();
926 LocationSummary* locations = invoke->GetLocations();
927
928 vixl32::Register str = InputRegisterAt(invoke, 0);
929 vixl32::Register arg = InputRegisterAt(invoke, 1);
930 vixl32::Register out = OutputRegister(invoke);
931
932 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
933
934 vixl32::Label loop;
935 vixl32::Label end;
936 vixl32::Label return_true;
937 vixl32::Label return_false;
938 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
939
940 // Get offsets of count, value, and class fields within a string object.
941 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
942 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
943 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
944
945 // Note that the null check must have been done earlier.
946 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
947
948 StringEqualsOptimizations optimizations(invoke);
949 if (!optimizations.GetArgumentNotNull()) {
950 // Check if input is null, return false if it is.
951 __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
952 }
953
954 // Reference equality check, return true if same reference.
955 __ Cmp(str, arg);
956 __ B(eq, &return_true, /* is_far_target= */ false);
957
958 if (!optimizations.GetArgumentIsString()) {
959 // Instanceof check for the argument by comparing class fields.
960 // All string objects must have the same type since String cannot be subclassed.
961 // Receiver must be a string object, so its class field is equal to all strings' class fields.
962 // If the argument is a string object, its class field must be equal to receiver's class field.
963 //
964 // As the String class is expected to be non-movable, we can read the class
965 // field from String.equals' arguments without read barriers.
966 AssertNonMovableStringClass();
967 // /* HeapReference<Class> */ temp = str->klass_
968 __ Ldr(temp, MemOperand(str, class_offset));
969 // /* HeapReference<Class> */ out = arg->klass_
970 __ Ldr(out, MemOperand(arg, class_offset));
971 // Also, because we use the previously loaded class references only in the
972 // following comparison, we don't need to unpoison them.
973 __ Cmp(temp, out);
974 __ B(ne, &return_false, /* is_far_target= */ false);
975 }
976
977 // Check if one of the inputs is a const string. Do not special-case both strings
978 // being const, such cases should be handled by constant folding if needed.
979 uint32_t const_string_length = 0u;
980 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
981 if (const_string == nullptr) {
982 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
983 if (const_string != nullptr) {
984 std::swap(str, arg); // Make sure the const string is in `str`.
985 }
986 }
987 bool is_compressed =
988 mirror::kUseStringCompression &&
989 const_string != nullptr &&
990 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
991
992 if (const_string != nullptr) {
993 // Load `count` field of the argument string and check if it matches the const string.
994 // Also compares the compression style, if differs return false.
995 __ Ldr(temp, MemOperand(arg, count_offset));
996 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
997 __ B(ne, &return_false, /* is_far_target= */ false);
998 } else {
999 // Load `count` fields of this and argument strings.
1000 __ Ldr(temp, MemOperand(str, count_offset));
1001 __ Ldr(out, MemOperand(arg, count_offset));
1002 // Check if `count` fields are equal, return false if they're not.
1003 // Also compares the compression style, if differs return false.
1004 __ Cmp(temp, out);
1005 __ B(ne, &return_false, /* is_far_target= */ false);
1006 }
1007
1008 // Assertions that must hold in order to compare strings 4 bytes at a time.
1009 // Ok to do this because strings are zero-padded to kObjectAlignment.
1010 DCHECK_ALIGNED(value_offset, 4);
1011 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1012
1013 if (const_string != nullptr &&
1014 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1015 : kShortConstStringEqualsCutoffInBytes / 2u)) {
1016 // Load and compare the contents. Though we know the contents of the short const string
1017 // at compile time, materializing constants may be more code than loading from memory.
1018 int32_t offset = value_offset;
1019 size_t remaining_bytes =
1020 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
1021 while (remaining_bytes > sizeof(uint32_t)) {
1022 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1023 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1024 vixl32::Register temp2 = scratch_scope.Acquire();
1025 __ Ldrd(temp, temp1, MemOperand(str, offset));
1026 __ Ldrd(temp2, out, MemOperand(arg, offset));
1027 __ Cmp(temp, temp2);
1028 __ B(ne, &return_false, /* is_far_target= */ false);
1029 __ Cmp(temp1, out);
1030 __ B(ne, &return_false, /* is_far_target= */ false);
1031 offset += 2u * sizeof(uint32_t);
1032 remaining_bytes -= 2u * sizeof(uint32_t);
1033 }
1034 if (remaining_bytes != 0u) {
1035 __ Ldr(temp, MemOperand(str, offset));
1036 __ Ldr(out, MemOperand(arg, offset));
1037 __ Cmp(temp, out);
1038 __ B(ne, &return_false, /* is_far_target= */ false);
1039 }
1040 } else {
1041 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1042 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1043 "Expecting 0=compressed, 1=uncompressed");
1044 __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
1045
1046 if (mirror::kUseStringCompression) {
1047 // For string compression, calculate the number of bytes to compare (not chars).
1048 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1049 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
1050 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1051 2 * kMaxInstructionSizeInBytes,
1052 CodeBufferCheckScope::kMaximumSize);
1053 __ it(cs); // If uncompressed,
1054 __ add(cs, temp, temp, temp); // double the byte count.
1055 }
1056
1057 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1058 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1059 vixl32::Register temp2 = scratch_scope.Acquire();
1060
1061 // Store offset of string value in preparation for comparison loop.
1062 __ Mov(temp1, value_offset);
1063
1064 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1065 __ Bind(&loop);
1066 __ Ldr(out, MemOperand(str, temp1));
1067 __ Ldr(temp2, MemOperand(arg, temp1));
1068 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1069 __ Cmp(out, temp2);
1070 __ B(ne, &return_false, /* is_far_target= */ false);
1071 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1072 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1073 __ B(hi, &loop, /* is_far_target= */ false);
1074 }
1075
1076 // Return true and exit the function.
1077 // If loop does not result in returning false, we return true.
1078 __ Bind(&return_true);
1079 __ Mov(out, 1);
1080 __ B(final_label);
1081
1082 // Return false and exit the function.
1083 __ Bind(&return_false);
1084 __ Mov(out, 0);
1085
1086 if (end.IsReferenced()) {
1087 __ Bind(&end);
1088 }
1089 }
1090
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1091 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1092 ArmVIXLAssembler* assembler,
1093 CodeGeneratorARMVIXL* codegen,
1094 bool start_at_zero) {
1095 LocationSummary* locations = invoke->GetLocations();
1096
1097 // Note that the null check must have been done earlier.
1098 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1099
1100 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1101 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1102 SlowPathCodeARMVIXL* slow_path = nullptr;
1103 HInstruction* code_point = invoke->InputAt(1);
1104 if (code_point->IsIntConstant()) {
1105 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1106 std::numeric_limits<uint16_t>::max()) {
1107 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1108 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1109 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1110 codegen->AddSlowPath(slow_path);
1111 __ B(slow_path->GetEntryLabel());
1112 __ Bind(slow_path->GetExitLabel());
1113 return;
1114 }
1115 } else if (code_point->GetType() != DataType::Type::kUint16) {
1116 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1117 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1118 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1119 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1120 codegen->AddSlowPath(slow_path);
1121 __ B(hs, slow_path->GetEntryLabel());
1122 }
1123
1124 if (start_at_zero) {
1125 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1126 DCHECK(tmp_reg.Is(r2));
1127 // Start-index = 0.
1128 __ Mov(tmp_reg, 0);
1129 }
1130
1131 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1132 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1133
1134 if (slow_path != nullptr) {
1135 __ Bind(slow_path->GetExitLabel());
1136 }
1137 }
1138
VisitStringIndexOf(HInvoke * invoke)1139 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1140 LocationSummary* locations = new (allocator_) LocationSummary(
1141 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1142 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1143 // best to align the inputs accordingly.
1144 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1145 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1146 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1147 locations->SetOut(LocationFrom(r0));
1148
1149 // Need to send start-index=0.
1150 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1151 }
1152
VisitStringIndexOf(HInvoke * invoke)1153 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1154 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1155 }
1156
VisitStringIndexOfAfter(HInvoke * invoke)1157 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1158 LocationSummary* locations = new (allocator_) LocationSummary(
1159 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1160 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1161 // best to align the inputs accordingly.
1162 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1163 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1164 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1165 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1166 locations->SetOut(LocationFrom(r0));
1167 }
1168
VisitStringIndexOfAfter(HInvoke * invoke)1169 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1170 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1171 }
1172
VisitStringNewStringFromBytes(HInvoke * invoke)1173 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1174 LocationSummary* locations = new (allocator_) LocationSummary(
1175 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1176 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1177 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1178 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1179 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1180 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1181 locations->SetOut(LocationFrom(r0));
1182 }
1183
VisitStringNewStringFromBytes(HInvoke * invoke)1184 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1185 ArmVIXLAssembler* assembler = GetAssembler();
1186 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1187 __ Cmp(byte_array, 0);
1188 SlowPathCodeARMVIXL* slow_path =
1189 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1190 codegen_->AddSlowPath(slow_path);
1191 __ B(eq, slow_path->GetEntryLabel());
1192
1193 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1194 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1195 __ Bind(slow_path->GetExitLabel());
1196 }
1197
VisitStringNewStringFromChars(HInvoke * invoke)1198 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1199 LocationSummary* locations =
1200 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1201 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1202 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1203 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1204 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1205 locations->SetOut(LocationFrom(r0));
1206 }
1207
VisitStringNewStringFromChars(HInvoke * invoke)1208 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1209 // No need to emit code checking whether `locations->InAt(2)` is a null
1210 // pointer, as callers of the native method
1211 //
1212 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1213 //
1214 // all include a null check on `data` before calling that method.
1215 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1216 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1217 }
1218
VisitStringNewStringFromString(HInvoke * invoke)1219 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1220 LocationSummary* locations = new (allocator_) LocationSummary(
1221 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1222 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1223 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1224 locations->SetOut(LocationFrom(r0));
1225 }
1226
VisitStringNewStringFromString(HInvoke * invoke)1227 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1228 ArmVIXLAssembler* assembler = GetAssembler();
1229 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1230 __ Cmp(string_to_copy, 0);
1231 SlowPathCodeARMVIXL* slow_path =
1232 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1233 codegen_->AddSlowPath(slow_path);
1234 __ B(eq, slow_path->GetEntryLabel());
1235
1236 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1237 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1238
1239 __ Bind(slow_path->GetExitLabel());
1240 }
1241
VisitSystemArrayCopy(HInvoke * invoke)1242 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1243 // The only read barrier implementation supporting the
1244 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1245 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1246 return;
1247 }
1248
1249 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1250 LocationSummary* locations = invoke->GetLocations();
1251 if (locations == nullptr) {
1252 return;
1253 }
1254
1255 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1256 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1257 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1258
1259 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1260 locations->SetInAt(1, Location::RequiresRegister());
1261 }
1262 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1263 locations->SetInAt(3, Location::RequiresRegister());
1264 }
1265 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1266 locations->SetInAt(4, Location::RequiresRegister());
1267 }
1268 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1269 // Temporary register IP cannot be used in
1270 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1271 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1272 // temporary register from the register allocator.
1273 locations->AddTemp(Location::RequiresRegister());
1274 }
1275 }
1276
CheckPosition(ArmVIXLAssembler * assembler,Location pos,vixl32::Register input,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_input_length=false)1277 static void CheckPosition(ArmVIXLAssembler* assembler,
1278 Location pos,
1279 vixl32::Register input,
1280 Location length,
1281 SlowPathCodeARMVIXL* slow_path,
1282 vixl32::Register temp,
1283 bool length_is_input_length = false) {
1284 // Where is the length in the Array?
1285 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1286
1287 if (pos.IsConstant()) {
1288 int32_t pos_const = Int32ConstantFrom(pos);
1289 if (pos_const == 0) {
1290 if (!length_is_input_length) {
1291 // Check that length(input) >= length.
1292 __ Ldr(temp, MemOperand(input, length_offset));
1293 if (length.IsConstant()) {
1294 __ Cmp(temp, Int32ConstantFrom(length));
1295 } else {
1296 __ Cmp(temp, RegisterFrom(length));
1297 }
1298 __ B(lt, slow_path->GetEntryLabel());
1299 }
1300 } else {
1301 // Check that length(input) >= pos.
1302 __ Ldr(temp, MemOperand(input, length_offset));
1303 __ Subs(temp, temp, pos_const);
1304 __ B(lt, slow_path->GetEntryLabel());
1305
1306 // Check that (length(input) - pos) >= length.
1307 if (length.IsConstant()) {
1308 __ Cmp(temp, Int32ConstantFrom(length));
1309 } else {
1310 __ Cmp(temp, RegisterFrom(length));
1311 }
1312 __ B(lt, slow_path->GetEntryLabel());
1313 }
1314 } else if (length_is_input_length) {
1315 // The only way the copy can succeed is if pos is zero.
1316 vixl32::Register pos_reg = RegisterFrom(pos);
1317 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1318 } else {
1319 // Check that pos >= 0.
1320 vixl32::Register pos_reg = RegisterFrom(pos);
1321 __ Cmp(pos_reg, 0);
1322 __ B(lt, slow_path->GetEntryLabel());
1323
1324 // Check that pos <= length(input).
1325 __ Ldr(temp, MemOperand(input, length_offset));
1326 __ Subs(temp, temp, pos_reg);
1327 __ B(lt, slow_path->GetEntryLabel());
1328
1329 // Check that (length(input) - pos) >= length.
1330 if (length.IsConstant()) {
1331 __ Cmp(temp, Int32ConstantFrom(length));
1332 } else {
1333 __ Cmp(temp, RegisterFrom(length));
1334 }
1335 __ B(lt, slow_path->GetEntryLabel());
1336 }
1337 }
1338
VisitSystemArrayCopy(HInvoke * invoke)1339 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1340 // The only read barrier implementation supporting the
1341 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1342 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1343
1344 ArmVIXLAssembler* assembler = GetAssembler();
1345 LocationSummary* locations = invoke->GetLocations();
1346
1347 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1348 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1349 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1350 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1351 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1352
1353 vixl32::Register src = InputRegisterAt(invoke, 0);
1354 Location src_pos = locations->InAt(1);
1355 vixl32::Register dest = InputRegisterAt(invoke, 2);
1356 Location dest_pos = locations->InAt(3);
1357 Location length = locations->InAt(4);
1358 Location temp1_loc = locations->GetTemp(0);
1359 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1360 Location temp2_loc = locations->GetTemp(1);
1361 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1362 Location temp3_loc = locations->GetTemp(2);
1363 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1364
1365 SlowPathCodeARMVIXL* intrinsic_slow_path =
1366 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1367 codegen_->AddSlowPath(intrinsic_slow_path);
1368
1369 vixl32::Label conditions_on_positions_validated;
1370 SystemArrayCopyOptimizations optimizations(invoke);
1371
1372 // If source and destination are the same, we go to slow path if we need to do
1373 // forward copying.
1374 if (src_pos.IsConstant()) {
1375 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1376 if (dest_pos.IsConstant()) {
1377 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1378 if (optimizations.GetDestinationIsSource()) {
1379 // Checked when building locations.
1380 DCHECK_GE(src_pos_constant, dest_pos_constant);
1381 } else if (src_pos_constant < dest_pos_constant) {
1382 __ Cmp(src, dest);
1383 __ B(eq, intrinsic_slow_path->GetEntryLabel());
1384 }
1385
1386 // Checked when building locations.
1387 DCHECK(!optimizations.GetDestinationIsSource()
1388 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1389 } else {
1390 if (!optimizations.GetDestinationIsSource()) {
1391 __ Cmp(src, dest);
1392 __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1393 }
1394 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1395 __ B(gt, intrinsic_slow_path->GetEntryLabel());
1396 }
1397 } else {
1398 if (!optimizations.GetDestinationIsSource()) {
1399 __ Cmp(src, dest);
1400 __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1401 }
1402 if (dest_pos.IsConstant()) {
1403 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1404 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1405 } else {
1406 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1407 }
1408 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1409 }
1410
1411 __ Bind(&conditions_on_positions_validated);
1412
1413 if (!optimizations.GetSourceIsNotNull()) {
1414 // Bail out if the source is null.
1415 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1416 }
1417
1418 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1419 // Bail out if the destination is null.
1420 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1421 }
1422
1423 // If the length is negative, bail out.
1424 // We have already checked in the LocationsBuilder for the constant case.
1425 if (!length.IsConstant() &&
1426 !optimizations.GetCountIsSourceLength() &&
1427 !optimizations.GetCountIsDestinationLength()) {
1428 __ Cmp(RegisterFrom(length), 0);
1429 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1430 }
1431
1432 // Validity checks: source.
1433 CheckPosition(assembler,
1434 src_pos,
1435 src,
1436 length,
1437 intrinsic_slow_path,
1438 temp1,
1439 optimizations.GetCountIsSourceLength());
1440
1441 // Validity checks: dest.
1442 CheckPosition(assembler,
1443 dest_pos,
1444 dest,
1445 length,
1446 intrinsic_slow_path,
1447 temp1,
1448 optimizations.GetCountIsDestinationLength());
1449
1450 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1451 // Check whether all elements of the source array are assignable to the component
1452 // type of the destination array. We do two checks: the classes are the same,
1453 // or the destination is Object[]. If none of these checks succeed, we go to the
1454 // slow path.
1455
1456 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1457 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1458 // /* HeapReference<Class> */ temp1 = src->klass_
1459 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1460 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
1461 // Bail out if the source is not a non primitive array.
1462 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1463 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1464 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1465 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
1466 // If heap poisoning is enabled, `temp1` has been unpoisoned
1467 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1468 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1469 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
1470 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1471 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1472 }
1473
1474 // /* HeapReference<Class> */ temp1 = dest->klass_
1475 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1476 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
1477
1478 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1479 // Bail out if the destination is not a non primitive array.
1480 //
1481 // Register `temp1` is not trashed by the read barrier emitted
1482 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1483 // method produces a call to a ReadBarrierMarkRegX entry point,
1484 // which saves all potentially live registers, including
1485 // temporaries such a `temp1`.
1486 // /* HeapReference<Class> */ temp2 = temp1->component_type_
1487 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1488 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false);
1489 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
1490 // If heap poisoning is enabled, `temp2` has been unpoisoned
1491 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1492 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1493 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
1494 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1495 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
1496 }
1497
1498 // For the same reason given earlier, `temp1` is not trashed by the
1499 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1500 // /* HeapReference<Class> */ temp2 = src->klass_
1501 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1502 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
1503 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1504 __ Cmp(temp1, temp2);
1505
1506 if (optimizations.GetDestinationIsTypedObjectArray()) {
1507 vixl32::Label do_copy;
1508 __ B(eq, &do_copy, /* is_far_target= */ false);
1509 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1510 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1511 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1512 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1513 // We do not need to emit a read barrier for the following
1514 // heap reference load, as `temp1` is only used in a
1515 // comparison with null below, and this reference is not
1516 // kept afterwards.
1517 __ Ldr(temp1, MemOperand(temp1, super_offset));
1518 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1519 __ Bind(&do_copy);
1520 } else {
1521 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1522 }
1523 } else {
1524 // Non read barrier code.
1525
1526 // /* HeapReference<Class> */ temp1 = dest->klass_
1527 __ Ldr(temp1, MemOperand(dest, class_offset));
1528 // /* HeapReference<Class> */ temp2 = src->klass_
1529 __ Ldr(temp2, MemOperand(src, class_offset));
1530 bool did_unpoison = false;
1531 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1532 !optimizations.GetSourceIsNonPrimitiveArray()) {
1533 // One or two of the references need to be unpoisoned. Unpoison them
1534 // both to make the identity check valid.
1535 assembler->MaybeUnpoisonHeapReference(temp1);
1536 assembler->MaybeUnpoisonHeapReference(temp2);
1537 did_unpoison = true;
1538 }
1539
1540 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1541 // Bail out if the destination is not a non primitive array.
1542 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1543 __ Ldr(temp3, MemOperand(temp1, component_offset));
1544 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1545 assembler->MaybeUnpoisonHeapReference(temp3);
1546 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1547 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1548 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1549 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1550 }
1551
1552 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1553 // Bail out if the source is not a non primitive array.
1554 // /* HeapReference<Class> */ temp3 = temp2->component_type_
1555 __ Ldr(temp3, MemOperand(temp2, component_offset));
1556 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1557 assembler->MaybeUnpoisonHeapReference(temp3);
1558 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1559 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1560 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1561 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1562 }
1563
1564 __ Cmp(temp1, temp2);
1565
1566 if (optimizations.GetDestinationIsTypedObjectArray()) {
1567 vixl32::Label do_copy;
1568 __ B(eq, &do_copy, /* is_far_target= */ false);
1569 if (!did_unpoison) {
1570 assembler->MaybeUnpoisonHeapReference(temp1);
1571 }
1572 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1573 __ Ldr(temp1, MemOperand(temp1, component_offset));
1574 assembler->MaybeUnpoisonHeapReference(temp1);
1575 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1576 __ Ldr(temp1, MemOperand(temp1, super_offset));
1577 // No need to unpoison the result, we're comparing against null.
1578 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1579 __ Bind(&do_copy);
1580 } else {
1581 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1582 }
1583 }
1584 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1585 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1586 // Bail out if the source is not a non primitive array.
1587 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1588 // /* HeapReference<Class> */ temp1 = src->klass_
1589 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1590 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
1591 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1592 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1593 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1594 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1595 // If heap poisoning is enabled, `temp3` has been unpoisoned
1596 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1597 } else {
1598 // /* HeapReference<Class> */ temp1 = src->klass_
1599 __ Ldr(temp1, MemOperand(src, class_offset));
1600 assembler->MaybeUnpoisonHeapReference(temp1);
1601 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1602 __ Ldr(temp3, MemOperand(temp1, component_offset));
1603 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1604 assembler->MaybeUnpoisonHeapReference(temp3);
1605 }
1606 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1607 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1608 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1609 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1610 }
1611
1612 if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
1613 // Null constant length: not need to emit the loop code at all.
1614 } else {
1615 vixl32::Label done;
1616 const DataType::Type type = DataType::Type::kReference;
1617 const int32_t element_size = DataType::Size(type);
1618
1619 if (length.IsRegister()) {
1620 // Don't enter the copy loop if the length is null.
1621 __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
1622 }
1623
1624 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1625 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1626
1627 // SystemArrayCopy implementation for Baker read barriers (see
1628 // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
1629 //
1630 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1631 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
1632 // bool is_gray = (rb_state == ReadBarrier::GrayState());
1633 // if (is_gray) {
1634 // // Slow-path copy.
1635 // do {
1636 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1637 // } while (src_ptr != end_ptr)
1638 // } else {
1639 // // Fast-path copy.
1640 // do {
1641 // *dest_ptr++ = *src_ptr++;
1642 // } while (src_ptr != end_ptr)
1643 // }
1644
1645 // /* int32_t */ monitor = src->monitor_
1646 __ Ldr(temp2, MemOperand(src, monitor_offset));
1647 // /* LockWord */ lock_word = LockWord(monitor)
1648 static_assert(sizeof(LockWord) == sizeof(int32_t),
1649 "art::LockWord and int32_t have different sizes.");
1650
1651 // Introduce a dependency on the lock_word including the rb_state,
1652 // which shall prevent load-load reordering without using
1653 // a memory barrier (which would be more expensive).
1654 // `src` is unchanged by this operation, but its value now depends
1655 // on `temp2`.
1656 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
1657
1658 // Compute the base source address in `temp1`.
1659 // Note that `temp1` (the base source address) is computed from
1660 // `src` (and `src_pos`) here, and thus honors the artificial
1661 // dependency of `src` on `temp2`.
1662 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
1663 // Compute the end source address in `temp3`.
1664 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
1665 // The base destination address is computed later, as `temp2` is
1666 // used for intermediate computations.
1667
1668 // Slow path used to copy array when `src` is gray.
1669 // Note that the base destination address is computed in `temp2`
1670 // by the slow path code.
1671 SlowPathCodeARMVIXL* read_barrier_slow_path =
1672 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
1673 codegen_->AddSlowPath(read_barrier_slow_path);
1674
1675 // Given the numeric representation, it's enough to check the low bit of the
1676 // rb_state. We do that by shifting the bit out of the lock word with LSRS
1677 // which can be a 16-bit instruction unlike the TST immediate.
1678 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1679 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1680 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
1681 // Carry flag is the last bit shifted out by LSRS.
1682 __ B(cs, read_barrier_slow_path->GetEntryLabel());
1683
1684 // Fast-path copy.
1685 // Compute the base destination address in `temp2`.
1686 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
1687 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1688 // poison/unpoison.
1689 vixl32::Label loop;
1690 __ Bind(&loop);
1691 {
1692 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1693 const vixl32::Register temp_reg = temps.Acquire();
1694 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1695 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1696 }
1697 __ Cmp(temp1, temp3);
1698 __ B(ne, &loop, /* is_far_target= */ false);
1699
1700 __ Bind(read_barrier_slow_path->GetExitLabel());
1701 } else {
1702 // Non read barrier code.
1703 // Compute the base source address in `temp1`.
1704 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
1705 // Compute the base destination address in `temp2`.
1706 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
1707 // Compute the end source address in `temp3`.
1708 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
1709 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1710 // poison/unpoison.
1711 vixl32::Label loop;
1712 __ Bind(&loop);
1713 {
1714 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1715 const vixl32::Register temp_reg = temps.Acquire();
1716 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1717 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1718 }
1719 __ Cmp(temp1, temp3);
1720 __ B(ne, &loop, /* is_far_target= */ false);
1721 }
1722 __ Bind(&done);
1723 }
1724
1725 // We only need one card marking on the destination array.
1726 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null= */ false);
1727
1728 __ Bind(intrinsic_slow_path->GetExitLabel());
1729 }
1730
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1731 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1732 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1733 // the code generator. Furthermore, the register allocator creates fixed live intervals
1734 // for all caller-saved registers because we are doing a function call. As a result, if
1735 // the input and output locations are unallocated, the register allocator runs out of
1736 // registers and fails; however, a debuggable graph is not the common case.
1737 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1738 return;
1739 }
1740
1741 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1742 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1743 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1744
1745 LocationSummary* const locations =
1746 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1747 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1748
1749 locations->SetInAt(0, Location::RequiresFpuRegister());
1750 locations->SetOut(Location::RequiresFpuRegister());
1751 // Native code uses the soft float ABI.
1752 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1753 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1754 }
1755
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1756 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1757 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1758 // the code generator. Furthermore, the register allocator creates fixed live intervals
1759 // for all caller-saved registers because we are doing a function call. As a result, if
1760 // the input and output locations are unallocated, the register allocator runs out of
1761 // registers and fails; however, a debuggable graph is not the common case.
1762 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1763 return;
1764 }
1765
1766 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1767 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1768 DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
1769 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1770
1771 LocationSummary* const locations =
1772 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1773 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1774
1775 locations->SetInAt(0, Location::RequiresFpuRegister());
1776 locations->SetInAt(1, Location::RequiresFpuRegister());
1777 locations->SetOut(Location::RequiresFpuRegister());
1778 // Native code uses the soft float ABI.
1779 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1780 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1781 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1782 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
1783 }
1784
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1785 static void GenFPToFPCall(HInvoke* invoke,
1786 ArmVIXLAssembler* assembler,
1787 CodeGeneratorARMVIXL* codegen,
1788 QuickEntrypointEnum entry) {
1789 LocationSummary* const locations = invoke->GetLocations();
1790
1791 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1792 DCHECK(locations->WillCall() && locations->Intrinsified());
1793
1794 // Native code uses the soft float ABI.
1795 __ Vmov(RegisterFrom(locations->GetTemp(0)),
1796 RegisterFrom(locations->GetTemp(1)),
1797 InputDRegisterAt(invoke, 0));
1798 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1799 __ Vmov(OutputDRegister(invoke),
1800 RegisterFrom(locations->GetTemp(0)),
1801 RegisterFrom(locations->GetTemp(1)));
1802 }
1803
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1804 static void GenFPFPToFPCall(HInvoke* invoke,
1805 ArmVIXLAssembler* assembler,
1806 CodeGeneratorARMVIXL* codegen,
1807 QuickEntrypointEnum entry) {
1808 LocationSummary* const locations = invoke->GetLocations();
1809
1810 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1811 DCHECK(locations->WillCall() && locations->Intrinsified());
1812
1813 // Native code uses the soft float ABI.
1814 __ Vmov(RegisterFrom(locations->GetTemp(0)),
1815 RegisterFrom(locations->GetTemp(1)),
1816 InputDRegisterAt(invoke, 0));
1817 __ Vmov(RegisterFrom(locations->GetTemp(2)),
1818 RegisterFrom(locations->GetTemp(3)),
1819 InputDRegisterAt(invoke, 1));
1820 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1821 __ Vmov(OutputDRegister(invoke),
1822 RegisterFrom(locations->GetTemp(0)),
1823 RegisterFrom(locations->GetTemp(1)));
1824 }
1825
VisitMathCos(HInvoke * invoke)1826 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
1827 CreateFPToFPCallLocations(allocator_, invoke);
1828 }
1829
VisitMathCos(HInvoke * invoke)1830 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
1831 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
1832 }
1833
VisitMathSin(HInvoke * invoke)1834 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
1835 CreateFPToFPCallLocations(allocator_, invoke);
1836 }
1837
VisitMathSin(HInvoke * invoke)1838 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
1839 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
1840 }
1841
VisitMathAcos(HInvoke * invoke)1842 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
1843 CreateFPToFPCallLocations(allocator_, invoke);
1844 }
1845
VisitMathAcos(HInvoke * invoke)1846 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
1847 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
1848 }
1849
VisitMathAsin(HInvoke * invoke)1850 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
1851 CreateFPToFPCallLocations(allocator_, invoke);
1852 }
1853
VisitMathAsin(HInvoke * invoke)1854 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
1855 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
1856 }
1857
VisitMathAtan(HInvoke * invoke)1858 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
1859 CreateFPToFPCallLocations(allocator_, invoke);
1860 }
1861
VisitMathAtan(HInvoke * invoke)1862 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
1863 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
1864 }
1865
VisitMathCbrt(HInvoke * invoke)1866 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1867 CreateFPToFPCallLocations(allocator_, invoke);
1868 }
1869
VisitMathCbrt(HInvoke * invoke)1870 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1871 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
1872 }
1873
VisitMathCosh(HInvoke * invoke)1874 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
1875 CreateFPToFPCallLocations(allocator_, invoke);
1876 }
1877
VisitMathCosh(HInvoke * invoke)1878 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
1879 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
1880 }
1881
VisitMathExp(HInvoke * invoke)1882 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
1883 CreateFPToFPCallLocations(allocator_, invoke);
1884 }
1885
VisitMathExp(HInvoke * invoke)1886 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
1887 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
1888 }
1889
VisitMathExpm1(HInvoke * invoke)1890 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1891 CreateFPToFPCallLocations(allocator_, invoke);
1892 }
1893
VisitMathExpm1(HInvoke * invoke)1894 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1895 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
1896 }
1897
VisitMathLog(HInvoke * invoke)1898 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
1899 CreateFPToFPCallLocations(allocator_, invoke);
1900 }
1901
VisitMathLog(HInvoke * invoke)1902 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
1903 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
1904 }
1905
VisitMathLog10(HInvoke * invoke)1906 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
1907 CreateFPToFPCallLocations(allocator_, invoke);
1908 }
1909
VisitMathLog10(HInvoke * invoke)1910 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
1911 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
1912 }
1913
VisitMathSinh(HInvoke * invoke)1914 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
1915 CreateFPToFPCallLocations(allocator_, invoke);
1916 }
1917
VisitMathSinh(HInvoke * invoke)1918 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
1919 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
1920 }
1921
VisitMathTan(HInvoke * invoke)1922 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
1923 CreateFPToFPCallLocations(allocator_, invoke);
1924 }
1925
VisitMathTan(HInvoke * invoke)1926 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
1927 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
1928 }
1929
VisitMathTanh(HInvoke * invoke)1930 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
1931 CreateFPToFPCallLocations(allocator_, invoke);
1932 }
1933
VisitMathTanh(HInvoke * invoke)1934 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
1935 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
1936 }
1937
VisitMathAtan2(HInvoke * invoke)1938 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1939 CreateFPFPToFPCallLocations(allocator_, invoke);
1940 }
1941
VisitMathAtan2(HInvoke * invoke)1942 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1943 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
1944 }
1945
VisitMathPow(HInvoke * invoke)1946 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
1947 CreateFPFPToFPCallLocations(allocator_, invoke);
1948 }
1949
VisitMathPow(HInvoke * invoke)1950 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
1951 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
1952 }
1953
VisitMathHypot(HInvoke * invoke)1954 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
1955 CreateFPFPToFPCallLocations(allocator_, invoke);
1956 }
1957
VisitMathHypot(HInvoke * invoke)1958 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
1959 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
1960 }
1961
VisitMathNextAfter(HInvoke * invoke)1962 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1963 CreateFPFPToFPCallLocations(allocator_, invoke);
1964 }
1965
VisitMathNextAfter(HInvoke * invoke)1966 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1967 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
1968 }
1969
VisitIntegerReverse(HInvoke * invoke)1970 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1971 CreateIntToIntLocations(allocator_, invoke);
1972 }
1973
VisitIntegerReverse(HInvoke * invoke)1974 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1975 ArmVIXLAssembler* assembler = GetAssembler();
1976 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
1977 }
1978
VisitLongReverse(HInvoke * invoke)1979 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
1980 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1981 }
1982
VisitLongReverse(HInvoke * invoke)1983 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
1984 ArmVIXLAssembler* assembler = GetAssembler();
1985 LocationSummary* locations = invoke->GetLocations();
1986
1987 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
1988 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
1989 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
1990 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
1991
1992 __ Rbit(out_reg_lo, in_reg_hi);
1993 __ Rbit(out_reg_hi, in_reg_lo);
1994 }
1995
GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler * assembler,Location pair)1996 static void GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler* assembler, Location pair) {
1997 DCHECK(pair.IsRegisterPair());
1998 __ Rev(LowRegisterFrom(pair), LowRegisterFrom(pair));
1999 __ Rev(HighRegisterFrom(pair), HighRegisterFrom(pair));
2000 }
2001
GenerateReverseBytes(ArmVIXLAssembler * assembler,DataType::Type type,Location in,Location out)2002 static void GenerateReverseBytes(ArmVIXLAssembler* assembler,
2003 DataType::Type type,
2004 Location in,
2005 Location out) {
2006 switch (type) {
2007 case DataType::Type::kUint16:
2008 __ Rev16(RegisterFrom(out), RegisterFrom(in));
2009 break;
2010 case DataType::Type::kInt16:
2011 __ Revsh(RegisterFrom(out), RegisterFrom(in));
2012 break;
2013 case DataType::Type::kInt32:
2014 __ Rev(RegisterFrom(out), RegisterFrom(in));
2015 break;
2016 case DataType::Type::kInt64:
2017 DCHECK(!LowRegisterFrom(out).Is(LowRegisterFrom(in)));
2018 __ Rev(LowRegisterFrom(out), HighRegisterFrom(in));
2019 __ Rev(HighRegisterFrom(out), LowRegisterFrom(in));
2020 break;
2021 case DataType::Type::kFloat32:
2022 __ Rev(RegisterFrom(in), RegisterFrom(in)); // Note: Clobbers `in`.
2023 __ Vmov(SRegisterFrom(out), RegisterFrom(in));
2024 break;
2025 case DataType::Type::kFloat64:
2026 GenerateReverseBytesInPlaceForEachWord(assembler, in); // Note: Clobbers `in`.
2027 __ Vmov(DRegisterFrom(out), HighRegisterFrom(in), LowRegisterFrom(in)); // Swap high/low.
2028 break;
2029 default:
2030 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
2031 UNREACHABLE();
2032 }
2033 }
2034
VisitIntegerReverseBytes(HInvoke * invoke)2035 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2036 CreateIntToIntLocations(allocator_, invoke);
2037 }
2038
VisitIntegerReverseBytes(HInvoke * invoke)2039 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2040 ArmVIXLAssembler* assembler = GetAssembler();
2041 LocationSummary* locations = invoke->GetLocations();
2042 GenerateReverseBytes(assembler, DataType::Type::kInt32, locations->InAt(0), locations->Out());
2043 }
2044
VisitLongReverseBytes(HInvoke * invoke)2045 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2046 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2047 }
2048
VisitLongReverseBytes(HInvoke * invoke)2049 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2050 ArmVIXLAssembler* assembler = GetAssembler();
2051 LocationSummary* locations = invoke->GetLocations();
2052 GenerateReverseBytes(assembler, DataType::Type::kInt64, locations->InAt(0), locations->Out());
2053 }
2054
VisitShortReverseBytes(HInvoke * invoke)2055 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2056 CreateIntToIntLocations(allocator_, invoke);
2057 }
2058
VisitShortReverseBytes(HInvoke * invoke)2059 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2060 ArmVIXLAssembler* assembler = GetAssembler();
2061 LocationSummary* locations = invoke->GetLocations();
2062 GenerateReverseBytes(assembler, DataType::Type::kInt16, locations->InAt(0), locations->Out());
2063 }
2064
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)2065 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
2066 DCHECK(DataType::IsIntOrLongType(type)) << type;
2067 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
2068 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
2069
2070 bool is_long = type == DataType::Type::kInt64;
2071 LocationSummary* locations = instr->GetLocations();
2072 Location in = locations->InAt(0);
2073 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2074 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2075 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2076 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2077 vixl32::Register out_r = OutputRegister(instr);
2078
2079 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2080 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2081 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2082 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2083 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2084 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2085 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2086 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2087 if (is_long) {
2088 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2089 }
2090 __ Vmov(out_r, tmp_s);
2091 }
2092
VisitIntegerBitCount(HInvoke * invoke)2093 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2094 CreateIntToIntLocations(allocator_, invoke);
2095 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2096 }
2097
VisitIntegerBitCount(HInvoke * invoke)2098 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2099 GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
2100 }
2101
VisitLongBitCount(HInvoke * invoke)2102 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2103 VisitIntegerBitCount(invoke);
2104 }
2105
VisitLongBitCount(HInvoke * invoke)2106 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2107 GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
2108 }
2109
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2110 static void GenHighestOneBit(HInvoke* invoke,
2111 DataType::Type type,
2112 CodeGeneratorARMVIXL* codegen) {
2113 DCHECK(DataType::IsIntOrLongType(type));
2114
2115 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2116 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2117 const vixl32::Register temp = temps.Acquire();
2118
2119 if (type == DataType::Type::kInt64) {
2120 LocationSummary* locations = invoke->GetLocations();
2121 Location in = locations->InAt(0);
2122 Location out = locations->Out();
2123
2124 vixl32::Register in_reg_lo = LowRegisterFrom(in);
2125 vixl32::Register in_reg_hi = HighRegisterFrom(in);
2126 vixl32::Register out_reg_lo = LowRegisterFrom(out);
2127 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2128
2129 __ Mov(temp, 0x80000000); // Modified immediate.
2130 __ Clz(out_reg_lo, in_reg_lo);
2131 __ Clz(out_reg_hi, in_reg_hi);
2132 __ Lsr(out_reg_lo, temp, out_reg_lo);
2133 __ Lsrs(out_reg_hi, temp, out_reg_hi);
2134
2135 // Discard result for lowest 32 bits if highest 32 bits are not zero.
2136 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2137 // we check that the output is in a low register, so that a 16-bit MOV
2138 // encoding can be used. If output is in a high register, then we generate
2139 // 4 more bytes of code to avoid a branch.
2140 Operand mov_src(0);
2141 if (!out_reg_lo.IsLow()) {
2142 __ Mov(LeaveFlags, temp, 0);
2143 mov_src = Operand(temp);
2144 }
2145 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2146 2 * vixl32::k16BitT32InstructionSizeInBytes,
2147 CodeBufferCheckScope::kExactSize);
2148 __ it(ne);
2149 __ mov(ne, out_reg_lo, mov_src);
2150 } else {
2151 vixl32::Register out = OutputRegister(invoke);
2152 vixl32::Register in = InputRegisterAt(invoke, 0);
2153
2154 __ Mov(temp, 0x80000000); // Modified immediate.
2155 __ Clz(out, in);
2156 __ Lsr(out, temp, out);
2157 }
2158 }
2159
VisitIntegerHighestOneBit(HInvoke * invoke)2160 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2161 CreateIntToIntLocations(allocator_, invoke);
2162 }
2163
VisitIntegerHighestOneBit(HInvoke * invoke)2164 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2165 GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2166 }
2167
VisitLongHighestOneBit(HInvoke * invoke)2168 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2169 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2170 }
2171
VisitLongHighestOneBit(HInvoke * invoke)2172 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2173 GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2174 }
2175
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2176 static void GenLowestOneBit(HInvoke* invoke,
2177 DataType::Type type,
2178 CodeGeneratorARMVIXL* codegen) {
2179 DCHECK(DataType::IsIntOrLongType(type));
2180
2181 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2182 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2183 const vixl32::Register temp = temps.Acquire();
2184
2185 if (type == DataType::Type::kInt64) {
2186 LocationSummary* locations = invoke->GetLocations();
2187 Location in = locations->InAt(0);
2188 Location out = locations->Out();
2189
2190 vixl32::Register in_reg_lo = LowRegisterFrom(in);
2191 vixl32::Register in_reg_hi = HighRegisterFrom(in);
2192 vixl32::Register out_reg_lo = LowRegisterFrom(out);
2193 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2194
2195 __ Rsb(out_reg_hi, in_reg_hi, 0);
2196 __ Rsb(out_reg_lo, in_reg_lo, 0);
2197 __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2198 // The result of this operation is 0 iff in_reg_lo is 0
2199 __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2200
2201 // Discard result for highest 32 bits if lowest 32 bits are not zero.
2202 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2203 // we check that the output is in a low register, so that a 16-bit MOV
2204 // encoding can be used. If output is in a high register, then we generate
2205 // 4 more bytes of code to avoid a branch.
2206 Operand mov_src(0);
2207 if (!out_reg_lo.IsLow()) {
2208 __ Mov(LeaveFlags, temp, 0);
2209 mov_src = Operand(temp);
2210 }
2211 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2212 2 * vixl32::k16BitT32InstructionSizeInBytes,
2213 CodeBufferCheckScope::kExactSize);
2214 __ it(ne);
2215 __ mov(ne, out_reg_hi, mov_src);
2216 } else {
2217 vixl32::Register out = OutputRegister(invoke);
2218 vixl32::Register in = InputRegisterAt(invoke, 0);
2219
2220 __ Rsb(temp, in, 0);
2221 __ And(out, temp, in);
2222 }
2223 }
2224
VisitIntegerLowestOneBit(HInvoke * invoke)2225 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2226 CreateIntToIntLocations(allocator_, invoke);
2227 }
2228
VisitIntegerLowestOneBit(HInvoke * invoke)2229 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2230 GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2231 }
2232
VisitLongLowestOneBit(HInvoke * invoke)2233 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2234 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2235 }
2236
VisitLongLowestOneBit(HInvoke * invoke)2237 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2238 GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2239 }
2240
VisitStringGetCharsNoCheck(HInvoke * invoke)2241 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2242 LocationSummary* locations =
2243 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2244 locations->SetInAt(0, Location::RequiresRegister());
2245 locations->SetInAt(1, Location::RequiresRegister());
2246 locations->SetInAt(2, Location::RequiresRegister());
2247 locations->SetInAt(3, Location::RequiresRegister());
2248 locations->SetInAt(4, Location::RequiresRegister());
2249
2250 // Temporary registers to store lengths of strings and for calculations.
2251 locations->AddTemp(Location::RequiresRegister());
2252 locations->AddTemp(Location::RequiresRegister());
2253 locations->AddTemp(Location::RequiresRegister());
2254 }
2255
VisitStringGetCharsNoCheck(HInvoke * invoke)2256 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2257 ArmVIXLAssembler* assembler = GetAssembler();
2258 LocationSummary* locations = invoke->GetLocations();
2259
2260 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2261 const size_t char_size = DataType::Size(DataType::Type::kUint16);
2262 DCHECK_EQ(char_size, 2u);
2263
2264 // Location of data in char array buffer.
2265 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2266
2267 // Location of char array data in string.
2268 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2269
2270 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2271 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2272 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2273 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2274 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2275 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2276 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2277
2278 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2279 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2280 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2281
2282 vixl32::Label done, compressed_string_loop;
2283 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2284 // dst to be copied.
2285 __ Add(dst_ptr, dstObj, data_offset);
2286 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2287
2288 __ Subs(num_chr, srcEnd, srcBegin);
2289 // Early out for valid zero-length retrievals.
2290 __ B(eq, final_label, /* is_far_target= */ false);
2291
2292 // src range to copy.
2293 __ Add(src_ptr, srcObj, value_offset);
2294
2295 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2296 vixl32::Register temp;
2297 vixl32::Label compressed_string_preloop;
2298 if (mirror::kUseStringCompression) {
2299 // Location of count in string.
2300 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2301 temp = temps.Acquire();
2302 // String's length.
2303 __ Ldr(temp, MemOperand(srcObj, count_offset));
2304 __ Tst(temp, 1);
2305 temps.Release(temp);
2306 __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2307 }
2308 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2309
2310 // Do the copy.
2311 vixl32::Label loop, remainder;
2312
2313 temp = temps.Acquire();
2314 // Save repairing the value of num_chr on the < 4 character path.
2315 __ Subs(temp, num_chr, 4);
2316 __ B(lt, &remainder, /* is_far_target= */ false);
2317
2318 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2319 __ Mov(num_chr, temp);
2320
2321 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2322 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2323 // to rectify these everywhere this intrinsic applies.)
2324 __ Bind(&loop);
2325 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2326 __ Subs(num_chr, num_chr, 4);
2327 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2328 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2329 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2330 temps.Release(temp);
2331 __ B(ge, &loop, /* is_far_target= */ false);
2332
2333 __ Adds(num_chr, num_chr, 4);
2334 __ B(eq, final_label, /* is_far_target= */ false);
2335
2336 // Main loop for < 4 character case and remainder handling. Loads and stores one
2337 // 16-bit Java character at a time.
2338 __ Bind(&remainder);
2339 temp = temps.Acquire();
2340 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2341 __ Subs(num_chr, num_chr, 1);
2342 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2343 temps.Release(temp);
2344 __ B(gt, &remainder, /* is_far_target= */ false);
2345
2346 if (mirror::kUseStringCompression) {
2347 __ B(final_label);
2348
2349 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2350 DCHECK_EQ(c_char_size, 1u);
2351 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2352 __ Bind(&compressed_string_preloop);
2353 __ Add(src_ptr, src_ptr, srcBegin);
2354 __ Bind(&compressed_string_loop);
2355 temp = temps.Acquire();
2356 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2357 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2358 temps.Release(temp);
2359 __ Subs(num_chr, num_chr, 1);
2360 __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2361 }
2362
2363 if (done.IsReferenced()) {
2364 __ Bind(&done);
2365 }
2366 }
2367
VisitFloatIsInfinite(HInvoke * invoke)2368 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2369 CreateFPToIntLocations(allocator_, invoke);
2370 }
2371
VisitFloatIsInfinite(HInvoke * invoke)2372 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2373 ArmVIXLAssembler* const assembler = GetAssembler();
2374 const vixl32::Register out = OutputRegister(invoke);
2375 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2376 // we don't care about the sign bit anyway.
2377 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2378
2379 __ Vmov(out, InputSRegisterAt(invoke, 0));
2380 // We don't care about the sign bit, so shift left.
2381 __ Lsl(out, out, 1);
2382 __ Eor(out, out, infinity);
2383 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2384 }
2385
VisitDoubleIsInfinite(HInvoke * invoke)2386 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2387 CreateFPToIntLocations(allocator_, invoke);
2388 }
2389
VisitDoubleIsInfinite(HInvoke * invoke)2390 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2391 ArmVIXLAssembler* const assembler = GetAssembler();
2392 const vixl32::Register out = OutputRegister(invoke);
2393 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2394 const vixl32::Register temp = temps.Acquire();
2395 // The highest 32 bits of double precision positive infinity separated into
2396 // two constants encodable as immediate operands.
2397 constexpr uint32_t infinity_high = 0x7f000000U;
2398 constexpr uint32_t infinity_high2 = 0x00f00000U;
2399
2400 static_assert((infinity_high | infinity_high2) ==
2401 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2402 "The constants do not add up to the high 32 bits of double "
2403 "precision positive infinity.");
2404 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2405 __ Eor(out, out, infinity_high);
2406 __ Eor(out, out, infinity_high2);
2407 // We don't care about the sign bit, so shift left.
2408 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2409 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2410 }
2411
VisitMathCeil(HInvoke * invoke)2412 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2413 if (features_.HasARMv8AInstructions()) {
2414 CreateFPToFPLocations(allocator_, invoke);
2415 }
2416 }
2417
VisitMathCeil(HInvoke * invoke)2418 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2419 ArmVIXLAssembler* assembler = GetAssembler();
2420 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2421 __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2422 }
2423
VisitMathFloor(HInvoke * invoke)2424 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2425 if (features_.HasARMv8AInstructions()) {
2426 CreateFPToFPLocations(allocator_, invoke);
2427 }
2428 }
2429
VisitMathFloor(HInvoke * invoke)2430 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2431 ArmVIXLAssembler* assembler = GetAssembler();
2432 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2433 __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2434 }
2435
VisitIntegerValueOf(HInvoke * invoke)2436 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2437 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2438 IntrinsicVisitor::ComputeIntegerValueOfLocations(
2439 invoke,
2440 codegen_,
2441 LocationFrom(r0),
2442 LocationFrom(calling_convention.GetRegisterAt(0)));
2443 }
2444
VisitIntegerValueOf(HInvoke * invoke)2445 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2446 IntrinsicVisitor::IntegerValueOfInfo info =
2447 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2448 LocationSummary* locations = invoke->GetLocations();
2449 ArmVIXLAssembler* const assembler = GetAssembler();
2450
2451 vixl32::Register out = RegisterFrom(locations->Out());
2452 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2453 vixl32::Register temp = temps.Acquire();
2454 auto allocate_instance = [&]() {
2455 DCHECK(out.Is(InvokeRuntimeCallingConventionARMVIXL().GetRegisterAt(0)));
2456 codegen_->LoadIntrinsicDeclaringClass(out, invoke);
2457 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2458 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2459 };
2460 if (invoke->InputAt(0)->IsConstant()) {
2461 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2462 if (static_cast<uint32_t>(value - info.low) < info.length) {
2463 // Just embed the j.l.Integer in the code.
2464 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2465 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2466 } else {
2467 DCHECK(locations->CanCall());
2468 // Allocate and initialize a new j.l.Integer.
2469 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2470 // JIT object table.
2471 allocate_instance();
2472 __ Mov(temp, value);
2473 assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
2474 // Class pointer and `value` final field stores require a barrier before publication.
2475 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2476 }
2477 } else {
2478 DCHECK(locations->CanCall());
2479 vixl32::Register in = RegisterFrom(locations->InAt(0));
2480 // Check bounds of our cache.
2481 __ Add(out, in, -info.low);
2482 __ Cmp(out, info.length);
2483 vixl32::Label allocate, done;
2484 __ B(hs, &allocate, /* is_far_target= */ false);
2485 // If the value is within the bounds, load the j.l.Integer directly from the array.
2486 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2487 codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
2488 assembler->MaybeUnpoisonHeapReference(out);
2489 __ B(&done);
2490 __ Bind(&allocate);
2491 // Otherwise allocate and initialize a new j.l.Integer.
2492 allocate_instance();
2493 assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
2494 // Class pointer and `value` final field stores require a barrier before publication.
2495 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2496 __ Bind(&done);
2497 }
2498 }
2499
VisitReferenceGetReferent(HInvoke * invoke)2500 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2501 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
2502 }
2503
VisitReferenceGetReferent(HInvoke * invoke)2504 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2505 ArmVIXLAssembler* assembler = GetAssembler();
2506 LocationSummary* locations = invoke->GetLocations();
2507
2508 Location obj = locations->InAt(0);
2509 Location out = locations->Out();
2510
2511 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2512 codegen_->AddSlowPath(slow_path);
2513
2514 if (kEmitCompilerReadBarrier) {
2515 // Check self->GetWeakRefAccessEnabled().
2516 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2517 vixl32::Register temp = temps.Acquire();
2518 __ Ldr(temp,
2519 MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArmPointerSize>().Uint32Value()));
2520 __ Cmp(temp, 0);
2521 __ B(eq, slow_path->GetEntryLabel());
2522 }
2523
2524 {
2525 // Load the java.lang.ref.Reference class.
2526 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2527 vixl32::Register temp = temps.Acquire();
2528 codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
2529
2530 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
2531 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
2532 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
2533 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
2534 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
2535 __ Ldrh(temp, MemOperand(temp, disable_intrinsic_offset.Uint32Value()));
2536 __ Cmp(temp, 0);
2537 __ B(ne, slow_path->GetEntryLabel());
2538 }
2539
2540 // Load the value from the field.
2541 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2542 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2543 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2544 out,
2545 RegisterFrom(obj),
2546 referent_offset,
2547 /*maybe_temp=*/ Location::NoLocation(),
2548 /*needs_null_check=*/ true);
2549 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2550 } else {
2551 {
2552 vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2553 __ Ldr(RegisterFrom(out), MemOperand(RegisterFrom(obj), referent_offset));
2554 codegen_->MaybeRecordImplicitNullCheck(invoke);
2555 }
2556 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2557 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
2558 }
2559 __ Bind(slow_path->GetExitLabel());
2560 }
2561
VisitReferenceRefersTo(HInvoke * invoke)2562 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2563 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
2564 }
2565
VisitReferenceRefersTo(HInvoke * invoke)2566 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2567 LocationSummary* locations = invoke->GetLocations();
2568 ArmVIXLAssembler* assembler = GetAssembler();
2569 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2570
2571 vixl32::Register obj = RegisterFrom(locations->InAt(0));
2572 vixl32::Register other = RegisterFrom(locations->InAt(1));
2573 vixl32::Register out = RegisterFrom(locations->Out());
2574 vixl32::Register tmp = temps.Acquire();
2575
2576 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2577 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2578
2579 {
2580 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2581 // Loading scratch register always uses 32-bit encoding.
2582 vixl::ExactAssemblyScope eas(assembler->GetVIXLAssembler(),
2583 vixl32::k32BitT32InstructionSizeInBytes);
2584 __ ldr(tmp, MemOperand(obj, referent_offset));
2585 codegen_->MaybeRecordImplicitNullCheck(invoke);
2586 }
2587 assembler->MaybeUnpoisonHeapReference(tmp);
2588 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2589
2590 if (kEmitCompilerReadBarrier) {
2591 DCHECK(kUseBakerReadBarrier);
2592
2593 vixl32::Label calculate_result;
2594 __ Subs(out, tmp, other);
2595 __ B(eq, &calculate_result); // `out` is 0 if taken.
2596
2597 // Check if the loaded reference is null.
2598 __ Cmp(tmp, 0);
2599 __ B(eq, &calculate_result); // `out` is not 0 if taken.
2600
2601 // For correct memory visibility, we need a barrier before loading the lock word
2602 // but we already have the barrier emitted for volatile load above which is sufficient.
2603
2604 // Load the lockword and check if it is a forwarding address.
2605 static_assert(LockWord::kStateShift == 30u);
2606 static_assert(LockWord::kStateForwardingAddress == 3u);
2607 __ Ldr(tmp, MemOperand(tmp, monitor_offset));
2608 __ Cmp(tmp, Operand(0xc0000000));
2609 __ B(lo, &calculate_result); // `out` is not 0 if taken.
2610
2611 // Extract the forwarding address and subtract from `other`.
2612 __ Sub(out, other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
2613
2614 __ Bind(&calculate_result);
2615 } else {
2616 DCHECK(!kEmitCompilerReadBarrier);
2617 __ Sub(out, tmp, other);
2618 }
2619
2620 // Convert 0 to 1 and non-zero to 0 for the Boolean result (`out = (out == 0)`).
2621 __ Clz(out, out);
2622 __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
2623 }
2624
VisitThreadInterrupted(HInvoke * invoke)2625 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2626 LocationSummary* locations =
2627 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2628 locations->SetOut(Location::RequiresRegister());
2629 }
2630
VisitThreadInterrupted(HInvoke * invoke)2631 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2632 ArmVIXLAssembler* assembler = GetAssembler();
2633 vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
2634 int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
2635 __ Ldr(out, MemOperand(tr, offset));
2636 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2637 vixl32::Register temp = temps.Acquire();
2638 vixl32::Label done;
2639 vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
2640 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
2641 __ Dmb(vixl32::ISH);
2642 __ Mov(temp, 0);
2643 assembler->StoreToOffset(kStoreWord, temp, tr, offset);
2644 __ Dmb(vixl32::ISH);
2645 if (done.IsReferenced()) {
2646 __ Bind(&done);
2647 }
2648 }
2649
VisitReachabilityFence(HInvoke * invoke)2650 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
2651 LocationSummary* locations =
2652 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2653 locations->SetInAt(0, Location::Any());
2654 }
2655
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)2656 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
2657
VisitIntegerDivideUnsigned(HInvoke * invoke)2658 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2659 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
2660 }
2661
VisitIntegerDivideUnsigned(HInvoke * invoke)2662 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2663 ArmVIXLAssembler* assembler = GetAssembler();
2664 LocationSummary* locations = invoke->GetLocations();
2665 vixl32::Register dividend = RegisterFrom(locations->InAt(0));
2666 vixl32::Register divisor = RegisterFrom(locations->InAt(1));
2667 vixl32::Register out = RegisterFrom(locations->Out());
2668
2669 // Check if divisor is zero, bail to managed implementation to handle.
2670 SlowPathCodeARMVIXL* slow_path =
2671 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2672 codegen_->AddSlowPath(slow_path);
2673 __ CompareAndBranchIfZero(divisor, slow_path->GetEntryLabel());
2674
2675 __ Udiv(out, dividend, divisor);
2676
2677 __ Bind(slow_path->GetExitLabel());
2678 }
2679
Use64BitExclusiveLoadStore(bool atomic,CodeGeneratorARMVIXL * codegen)2680 static inline bool Use64BitExclusiveLoadStore(bool atomic, CodeGeneratorARMVIXL* codegen) {
2681 return atomic && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
2682 }
2683
GenerateIntrinsicGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location out,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2684 static void GenerateIntrinsicGet(HInvoke* invoke,
2685 CodeGeneratorARMVIXL* codegen,
2686 DataType::Type type,
2687 std::memory_order order,
2688 bool atomic,
2689 vixl32::Register base,
2690 vixl32::Register offset,
2691 Location out,
2692 Location maybe_temp,
2693 Location maybe_temp2,
2694 Location maybe_temp3) {
2695 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2696 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2697 DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2698 DCHECK(atomic || order == std::memory_order_relaxed);
2699
2700 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2701 MemOperand address(base, offset);
2702 switch (type) {
2703 case DataType::Type::kBool:
2704 __ Ldrb(RegisterFrom(out), address);
2705 break;
2706 case DataType::Type::kInt8:
2707 __ Ldrsb(RegisterFrom(out), address);
2708 break;
2709 case DataType::Type::kUint16:
2710 __ Ldrh(RegisterFrom(out), address);
2711 break;
2712 case DataType::Type::kInt16:
2713 __ Ldrsh(RegisterFrom(out), address);
2714 break;
2715 case DataType::Type::kInt32:
2716 __ Ldr(RegisterFrom(out), address);
2717 break;
2718 case DataType::Type::kInt64:
2719 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2720 vixl32::Register strexd_tmp = RegisterFrom(maybe_temp);
2721 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2722 const vixl32::Register temp_reg = temps.Acquire();
2723 __ Add(temp_reg, base, offset);
2724 vixl32::Label loop;
2725 __ Bind(&loop);
2726 __ Ldrexd(LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2727 __ Strexd(strexd_tmp, LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2728 __ Cmp(strexd_tmp, 0);
2729 __ B(ne, &loop);
2730 } else {
2731 __ Ldrd(LowRegisterFrom(out), HighRegisterFrom(out), address);
2732 }
2733 break;
2734 case DataType::Type::kReference:
2735 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2736 // Piggy-back on the field load path using introspection for the Baker read barrier.
2737 vixl32::Register temp = RegisterFrom(maybe_temp);
2738 __ Add(temp, base, offset);
2739 codegen->GenerateFieldLoadWithBakerReadBarrier(
2740 invoke, out, base, MemOperand(temp), /* needs_null_check= */ false);
2741 } else {
2742 __ Ldr(RegisterFrom(out), address);
2743 }
2744 break;
2745 case DataType::Type::kFloat32: {
2746 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2747 const vixl32::Register temp_reg = temps.Acquire();
2748 __ Add(temp_reg, base, offset);
2749 __ Vldr(SRegisterFrom(out), MemOperand(temp_reg));
2750 break;
2751 }
2752 case DataType::Type::kFloat64: {
2753 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2754 const vixl32::Register temp_reg = temps.Acquire();
2755 __ Add(temp_reg, base, offset);
2756 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2757 vixl32::Register lo = RegisterFrom(maybe_temp);
2758 vixl32::Register hi = RegisterFrom(maybe_temp2);
2759 vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2760 vixl32::Label loop;
2761 __ Bind(&loop);
2762 __ Ldrexd(lo, hi, MemOperand(temp_reg));
2763 __ Strexd(strexd_tmp, lo, hi, MemOperand(temp_reg));
2764 __ Cmp(strexd_tmp, 0);
2765 __ B(ne, &loop);
2766 __ Vmov(DRegisterFrom(out), lo, hi);
2767 } else {
2768 __ Vldr(DRegisterFrom(out), MemOperand(temp_reg));
2769 }
2770 break;
2771 }
2772 default:
2773 LOG(FATAL) << "Unexpected type " << type;
2774 UNREACHABLE();
2775 }
2776 if (acquire_barrier) {
2777 codegen->GenerateMemoryBarrier(
2778 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
2779 }
2780 if (type == DataType::Type::kReference && !(kEmitCompilerReadBarrier && kUseBakerReadBarrier)) {
2781 Location base_loc = LocationFrom(base);
2782 Location index_loc = LocationFrom(offset);
2783 codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
2784 }
2785 }
2786
CreateUnsafeGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2787 static void CreateUnsafeGetLocations(HInvoke* invoke,
2788 CodeGeneratorARMVIXL* codegen,
2789 DataType::Type type,
2790 bool atomic) {
2791 bool can_call = kEmitCompilerReadBarrier &&
2792 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
2793 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
2794 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2795 LocationSummary* locations =
2796 new (allocator) LocationSummary(invoke,
2797 can_call
2798 ? LocationSummary::kCallOnSlowPath
2799 : LocationSummary::kNoCall,
2800 kIntrinsified);
2801 if (can_call && kUseBakerReadBarrier) {
2802 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2803 }
2804 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2805 locations->SetInAt(1, Location::RequiresRegister());
2806 locations->SetInAt(2, Location::RequiresRegister());
2807 locations->SetOut(Location::RequiresRegister(),
2808 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2809 if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
2810 (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2811 // We need a temporary register for the read barrier marking slow
2812 // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
2813 // or the STREXD result for LDREXD/STREXD sequence when LDRD is non-atomic.
2814 locations->AddTemp(Location::RequiresRegister());
2815 }
2816 }
2817
GenUnsafeGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic)2818 static void GenUnsafeGet(HInvoke* invoke,
2819 CodeGeneratorARMVIXL* codegen,
2820 DataType::Type type,
2821 std::memory_order order,
2822 bool atomic) {
2823 LocationSummary* locations = invoke->GetLocations();
2824 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
2825 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
2826 Location out = locations->Out();
2827 Location maybe_temp = Location::NoLocation();
2828 if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
2829 (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2830 maybe_temp = locations->GetTemp(0);
2831 }
2832 GenerateIntrinsicGet(invoke,
2833 codegen,
2834 type,
2835 order,
2836 atomic,
2837 base,
2838 offset,
2839 out,
2840 maybe_temp,
2841 /*maybe_temp2=*/ Location::NoLocation(),
2842 /*maybe_temp3=*/ Location::NoLocation());
2843 }
2844
VisitUnsafeGet(HInvoke * invoke)2845 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2846 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
2847 }
2848
VisitUnsafeGet(HInvoke * invoke)2849 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2850 GenUnsafeGet(
2851 invoke, codegen_, DataType::Type::kInt32, std::memory_order_relaxed, /*atomic=*/ false);
2852 }
2853
VisitUnsafeGetVolatile(HInvoke * invoke)2854 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2855 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2856 }
2857
VisitUnsafeGetVolatile(HInvoke * invoke)2858 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2859 GenUnsafeGet(
2860 invoke, codegen_, DataType::Type::kInt32, std::memory_order_seq_cst, /*atomic=*/ true);
2861 }
2862
VisitUnsafeGetLong(HInvoke * invoke)2863 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2864 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
2865 }
2866
VisitUnsafeGetLong(HInvoke * invoke)2867 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2868 GenUnsafeGet(
2869 invoke, codegen_, DataType::Type::kInt64, std::memory_order_relaxed, /*atomic=*/ false);
2870 }
2871
VisitUnsafeGetLongVolatile(HInvoke * invoke)2872 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2873 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2874 }
2875
VisitUnsafeGetLongVolatile(HInvoke * invoke)2876 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2877 GenUnsafeGet(
2878 invoke, codegen_, DataType::Type::kInt64, std::memory_order_seq_cst, /*atomic=*/ true);
2879 }
2880
VisitUnsafeGetObject(HInvoke * invoke)2881 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2882 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
2883 }
2884
VisitUnsafeGetObject(HInvoke * invoke)2885 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2886 GenUnsafeGet(
2887 invoke, codegen_, DataType::Type::kReference, std::memory_order_relaxed, /*atomic=*/ false);
2888 }
2889
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2890 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2891 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2892 }
2893
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2894 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2895 GenUnsafeGet(
2896 invoke, codegen_, DataType::Type::kReference, std::memory_order_seq_cst, /*atomic=*/ true);
2897 }
2898
GenerateIntrinsicSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location value,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2899 static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
2900 DataType::Type type,
2901 std::memory_order order,
2902 bool atomic,
2903 vixl32::Register base,
2904 vixl32::Register offset,
2905 Location value,
2906 Location maybe_temp,
2907 Location maybe_temp2,
2908 Location maybe_temp3) {
2909 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2910 bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
2911 DCHECK(release_barrier || order == std::memory_order_relaxed);
2912 DCHECK(atomic || order == std::memory_order_relaxed);
2913
2914 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2915 if (release_barrier) {
2916 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
2917 }
2918 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2919 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2920 vixl32::Register temp = temps.Acquire();
2921 __ Mov(temp, RegisterFrom(value));
2922 assembler->PoisonHeapReference(temp);
2923 value = LocationFrom(temp);
2924 }
2925 MemOperand address = offset.IsValid() ? MemOperand(base, offset) : MemOperand(base);
2926 if (offset.IsValid() && (DataType::Is64BitType(type) || type == DataType::Type::kFloat32)) {
2927 const vixl32::Register temp_reg = temps.Acquire();
2928 __ Add(temp_reg, base, offset);
2929 address = MemOperand(temp_reg);
2930 }
2931 switch (type) {
2932 case DataType::Type::kBool:
2933 case DataType::Type::kInt8:
2934 __ Strb(RegisterFrom(value), address);
2935 break;
2936 case DataType::Type::kUint16:
2937 case DataType::Type::kInt16:
2938 __ Strh(RegisterFrom(value), address);
2939 break;
2940 case DataType::Type::kReference:
2941 case DataType::Type::kInt32:
2942 __ Str(RegisterFrom(value), address);
2943 break;
2944 case DataType::Type::kInt64:
2945 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2946 vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2947 vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2948 vixl32::Label loop;
2949 __ Bind(&loop);
2950 __ Ldrexd(lo_tmp, hi_tmp, address); // Ignore the retrieved value.
2951 __ Strexd(lo_tmp, LowRegisterFrom(value), HighRegisterFrom(value), address);
2952 __ Cmp(lo_tmp, 0);
2953 __ B(ne, &loop);
2954 } else {
2955 __ Strd(LowRegisterFrom(value), HighRegisterFrom(value), address);
2956 }
2957 break;
2958 case DataType::Type::kFloat32:
2959 __ Vstr(SRegisterFrom(value), address);
2960 break;
2961 case DataType::Type::kFloat64:
2962 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2963 vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2964 vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2965 vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2966 vixl32::Label loop;
2967 __ Bind(&loop);
2968 __ Ldrexd(lo_tmp, hi_tmp, address); // Ignore the retrieved value.
2969 __ Vmov(lo_tmp, hi_tmp, DRegisterFrom(value));
2970 __ Strexd(strexd_tmp, lo_tmp, hi_tmp, address);
2971 __ Cmp(strexd_tmp, 0);
2972 __ B(ne, &loop);
2973 } else {
2974 __ Vstr(DRegisterFrom(value), address);
2975 }
2976 break;
2977 default:
2978 LOG(FATAL) << "Unexpected type " << type;
2979 UNREACHABLE();
2980 }
2981 if (seq_cst_barrier) {
2982 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2983 }
2984 }
2985
CreateUnsafePutLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2986 static void CreateUnsafePutLocations(HInvoke* invoke,
2987 CodeGeneratorARMVIXL* codegen,
2988 DataType::Type type,
2989 bool atomic) {
2990 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2991 LocationSummary* locations =
2992 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2993 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2994 locations->SetInAt(1, Location::RequiresRegister());
2995 locations->SetInAt(2, Location::RequiresRegister());
2996 locations->SetInAt(3, Location::RequiresRegister());
2997
2998 if (type == DataType::Type::kInt64) {
2999 // Potentially need temps for ldrexd-strexd loop.
3000 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
3001 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
3002 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
3003 }
3004 } else if (type == DataType::Type::kReference) {
3005 // Temp for card-marking.
3006 locations->AddTemp(Location::RequiresRegister()); // Temp.
3007 }
3008 }
3009
GenUnsafePut(HInvoke * invoke,DataType::Type type,std::memory_order order,bool atomic,CodeGeneratorARMVIXL * codegen)3010 static void GenUnsafePut(HInvoke* invoke,
3011 DataType::Type type,
3012 std::memory_order order,
3013 bool atomic,
3014 CodeGeneratorARMVIXL* codegen) {
3015 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3016
3017 LocationSummary* locations = invoke->GetLocations();
3018 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
3019 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
3020 Location value = locations->InAt(3);
3021 Location maybe_temp = Location::NoLocation();
3022 Location maybe_temp2 = Location::NoLocation();
3023 if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
3024 maybe_temp = locations->GetTemp(0);
3025 maybe_temp2 = locations->GetTemp(1);
3026 }
3027
3028 GenerateIntrinsicSet(codegen,
3029 type,
3030 order,
3031 atomic,
3032 base,
3033 offset,
3034 value,
3035 maybe_temp,
3036 maybe_temp2,
3037 /*maybe_temp3=*/ Location::NoLocation());
3038
3039 if (type == DataType::Type::kReference) {
3040 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3041 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3042 vixl32::Register card = temps.Acquire();
3043 bool value_can_be_null = true; // TODO: Worth finding out this information?
3044 codegen->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
3045 }
3046 }
3047
VisitUnsafePut(HInvoke * invoke)3048 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3049 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
3050 }
3051
VisitUnsafePut(HInvoke * invoke)3052 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3053 GenUnsafePut(invoke,
3054 DataType::Type::kInt32,
3055 std::memory_order_relaxed,
3056 /*atomic=*/ false,
3057 codegen_);
3058 }
3059
VisitUnsafePutOrdered(HInvoke * invoke)3060 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3061 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3062 }
3063
VisitUnsafePutOrdered(HInvoke * invoke)3064 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3065 GenUnsafePut(invoke,
3066 DataType::Type::kInt32,
3067 std::memory_order_release,
3068 /*atomic=*/ true,
3069 codegen_);
3070 }
3071
VisitUnsafePutVolatile(HInvoke * invoke)3072 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3073 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3074 }
3075
VisitUnsafePutVolatile(HInvoke * invoke)3076 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3077 GenUnsafePut(invoke,
3078 DataType::Type::kInt32,
3079 std::memory_order_seq_cst,
3080 /*atomic=*/ true,
3081 codegen_);
3082 }
VisitUnsafePutObject(HInvoke * invoke)3083 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3084 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
3085 }
3086
VisitUnsafePutObject(HInvoke * invoke)3087 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3088 GenUnsafePut(invoke,
3089 DataType::Type::kReference,
3090 std::memory_order_relaxed,
3091 /*atomic=*/ false,
3092 codegen_);
3093 }
3094
VisitUnsafePutObjectOrdered(HInvoke * invoke)3095 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3096 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3097 }
3098
VisitUnsafePutObjectOrdered(HInvoke * invoke)3099 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3100 GenUnsafePut(invoke,
3101 DataType::Type::kReference,
3102 std::memory_order_release,
3103 /*atomic=*/ true,
3104 codegen_);
3105 }
3106
VisitUnsafePutObjectVolatile(HInvoke * invoke)3107 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3108 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3109 }
3110
VisitUnsafePutObjectVolatile(HInvoke * invoke)3111 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3112 GenUnsafePut(invoke,
3113 DataType::Type::kReference,
3114 std::memory_order_seq_cst,
3115 /*atomic=*/ true,
3116 codegen_);
3117 }
3118
VisitUnsafePutLong(HInvoke * invoke)3119 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3120 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
3121 }
3122
VisitUnsafePutLong(HInvoke * invoke)3123 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3124 GenUnsafePut(invoke,
3125 DataType::Type::kInt64,
3126 std::memory_order_relaxed,
3127 /*atomic=*/ false,
3128 codegen_);
3129 }
3130
VisitUnsafePutLongOrdered(HInvoke * invoke)3131 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3132 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3133 }
3134
VisitUnsafePutLongOrdered(HInvoke * invoke)3135 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3136 GenUnsafePut(invoke,
3137 DataType::Type::kInt64,
3138 std::memory_order_release,
3139 /*atomic=*/ true,
3140 codegen_);
3141 }
3142
VisitUnsafePutLongVolatile(HInvoke * invoke)3143 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3144 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3145 }
3146
VisitUnsafePutLongVolatile(HInvoke * invoke)3147 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3148 GenUnsafePut(invoke,
3149 DataType::Type::kInt64,
3150 std::memory_order_seq_cst,
3151 /*atomic=*/ true,
3152 codegen_);
3153 }
3154
EmitLoadExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,Location old_value)3155 static void EmitLoadExclusive(CodeGeneratorARMVIXL* codegen,
3156 DataType::Type type,
3157 vixl32::Register ptr,
3158 Location old_value) {
3159 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3160 switch (type) {
3161 case DataType::Type::kBool:
3162 case DataType::Type::kInt8:
3163 __ Ldrexb(RegisterFrom(old_value), MemOperand(ptr));
3164 break;
3165 case DataType::Type::kUint16:
3166 case DataType::Type::kInt16:
3167 __ Ldrexh(RegisterFrom(old_value), MemOperand(ptr));
3168 break;
3169 case DataType::Type::kInt32:
3170 case DataType::Type::kReference:
3171 __ Ldrex(RegisterFrom(old_value), MemOperand(ptr));
3172 break;
3173 case DataType::Type::kInt64:
3174 __ Ldrexd(LowRegisterFrom(old_value), HighRegisterFrom(old_value), MemOperand(ptr));
3175 break;
3176 default:
3177 LOG(FATAL) << "Unexpected type: " << type;
3178 UNREACHABLE();
3179 }
3180 switch (type) {
3181 case DataType::Type::kInt8:
3182 __ Sxtb(RegisterFrom(old_value), RegisterFrom(old_value));
3183 break;
3184 case DataType::Type::kInt16:
3185 __ Sxth(RegisterFrom(old_value), RegisterFrom(old_value));
3186 break;
3187 case DataType::Type::kReference:
3188 assembler->MaybeUnpoisonHeapReference(RegisterFrom(old_value));
3189 break;
3190 default:
3191 break;
3192 }
3193 }
3194
EmitStoreExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,vixl32::Register store_result,Location new_value)3195 static void EmitStoreExclusive(CodeGeneratorARMVIXL* codegen,
3196 DataType::Type type,
3197 vixl32::Register ptr,
3198 vixl32::Register store_result,
3199 Location new_value) {
3200 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3201 if (type == DataType::Type::kReference) {
3202 assembler->MaybePoisonHeapReference(RegisterFrom(new_value));
3203 }
3204 switch (type) {
3205 case DataType::Type::kBool:
3206 case DataType::Type::kInt8:
3207 __ Strexb(store_result, RegisterFrom(new_value), MemOperand(ptr));
3208 break;
3209 case DataType::Type::kUint16:
3210 case DataType::Type::kInt16:
3211 __ Strexh(store_result, RegisterFrom(new_value), MemOperand(ptr));
3212 break;
3213 case DataType::Type::kInt32:
3214 case DataType::Type::kReference:
3215 __ Strex(store_result, RegisterFrom(new_value), MemOperand(ptr));
3216 break;
3217 case DataType::Type::kInt64:
3218 __ Strexd(
3219 store_result, LowRegisterFrom(new_value), HighRegisterFrom(new_value), MemOperand(ptr));
3220 break;
3221 default:
3222 LOG(FATAL) << "Unexpected type: " << type;
3223 UNREACHABLE();
3224 }
3225 if (type == DataType::Type::kReference) {
3226 assembler->MaybeUnpoisonHeapReference(RegisterFrom(new_value));
3227 }
3228 }
3229
GenerateCompareAndSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,bool strong,vixl32::Label * cmp_failure,bool cmp_failure_is_far_target,vixl32::Register ptr,Location expected,Location new_value,Location old_value,vixl32::Register store_result,vixl32::Register success)3230 static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
3231 DataType::Type type,
3232 bool strong,
3233 vixl32::Label* cmp_failure,
3234 bool cmp_failure_is_far_target,
3235 vixl32::Register ptr,
3236 Location expected,
3237 Location new_value,
3238 Location old_value,
3239 vixl32::Register store_result,
3240 vixl32::Register success) {
3241 // For kReference, the `expected` shall be a register pair when called from a read barrier
3242 // slow path, specifying both the original `expected` as well as the unmarked old value from
3243 // the main path attempt to emit CAS when it matched `expected` after marking.
3244 // Otherwise the type of `expected` shall match the type of `new_value` and `old_value`.
3245 if (type == DataType::Type::kInt64) {
3246 DCHECK(expected.IsRegisterPair());
3247 DCHECK(new_value.IsRegisterPair());
3248 DCHECK(old_value.IsRegisterPair());
3249 } else {
3250 DCHECK(expected.IsRegister() ||
3251 (type == DataType::Type::kReference && expected.IsRegisterPair()));
3252 DCHECK(new_value.IsRegister());
3253 DCHECK(old_value.IsRegister());
3254 }
3255
3256 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3257
3258 // do {
3259 // old_value = [ptr]; // Load exclusive.
3260 // if (old_value != expected) goto cmp_failure;
3261 // store_result = failed([ptr] <- new_value); // Store exclusive.
3262 // } while (strong && store_result);
3263 //
3264 // If `success` is a valid register, there are additional instructions in the above code
3265 // to report success with value 1 and failure with value 0 in that register.
3266
3267 vixl32::Label loop_head;
3268 if (strong) {
3269 __ Bind(&loop_head);
3270 }
3271 EmitLoadExclusive(codegen, type, ptr, old_value);
3272 // We do not need to initialize the failure code for comparison failure if the
3273 // branch goes to the read barrier slow path that clobbers `success` anyway.
3274 bool init_failure_for_cmp =
3275 success.IsValid() &&
3276 !(kEmitCompilerReadBarrier && type == DataType::Type::kReference && expected.IsRegister());
3277 // Instruction scheduling: Loading a constant between LDREX* and using the loaded value
3278 // is essentially free, so prepare the failure value here if we can.
3279 bool init_failure_for_cmp_early =
3280 init_failure_for_cmp && !old_value.Contains(LocationFrom(success));
3281 if (init_failure_for_cmp_early) {
3282 __ Mov(success, 0); // Indicate failure if the comparison fails.
3283 }
3284 if (type == DataType::Type::kInt64) {
3285 __ Cmp(LowRegisterFrom(old_value), LowRegisterFrom(expected));
3286 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3287 __ it(eq);
3288 __ cmp(eq, HighRegisterFrom(old_value), HighRegisterFrom(expected));
3289 } else if (expected.IsRegisterPair()) {
3290 DCHECK_EQ(type, DataType::Type::kReference);
3291 // Check if the loaded value matches any of the two registers in `expected`.
3292 __ Cmp(RegisterFrom(old_value), LowRegisterFrom(expected));
3293 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3294 __ it(ne);
3295 __ cmp(ne, RegisterFrom(old_value), HighRegisterFrom(expected));
3296 } else {
3297 __ Cmp(RegisterFrom(old_value), RegisterFrom(expected));
3298 }
3299 if (init_failure_for_cmp && !init_failure_for_cmp_early) {
3300 __ Mov(LeaveFlags, success, 0); // Indicate failure if the comparison fails.
3301 }
3302 __ B(ne, cmp_failure, /*is_far_target=*/ cmp_failure_is_far_target);
3303 EmitStoreExclusive(codegen, type, ptr, store_result, new_value);
3304 if (strong) {
3305 // Instruction scheduling: Loading a constant between STREX* and using its result
3306 // is essentially free, so prepare the success value here if needed.
3307 if (success.IsValid()) {
3308 DCHECK(!success.Is(store_result));
3309 __ Mov(success, 1); // Indicate success if the store succeeds.
3310 }
3311 __ Cmp(store_result, 0);
3312 __ B(ne, &loop_head, /*is_far_target=*/ false);
3313 } else {
3314 // Weak CAS (VarHandle.CompareAndExchange variants) always indicates success.
3315 DCHECK(success.IsValid());
3316 // Flip the `store_result` to indicate success by 1 and failure by 0.
3317 __ Eor(success, store_result, 1);
3318 }
3319 }
3320
3321 class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
3322 public:
ReadBarrierCasSlowPathARMVIXL(HInvoke * invoke,bool strong,vixl32::Register base,vixl32::Register offset,vixl32::Register expected,vixl32::Register new_value,vixl32::Register old_value,vixl32::Register old_value_temp,vixl32::Register store_result,vixl32::Register success,CodeGeneratorARMVIXL * arm_codegen)3323 explicit ReadBarrierCasSlowPathARMVIXL(HInvoke* invoke,
3324 bool strong,
3325 vixl32::Register base,
3326 vixl32::Register offset,
3327 vixl32::Register expected,
3328 vixl32::Register new_value,
3329 vixl32::Register old_value,
3330 vixl32::Register old_value_temp,
3331 vixl32::Register store_result,
3332 vixl32::Register success,
3333 CodeGeneratorARMVIXL* arm_codegen)
3334 : SlowPathCodeARMVIXL(invoke),
3335 strong_(strong),
3336 base_(base),
3337 offset_(offset),
3338 expected_(expected),
3339 new_value_(new_value),
3340 old_value_(old_value),
3341 old_value_temp_(old_value_temp),
3342 store_result_(store_result),
3343 success_(success),
3344 mark_old_value_slow_path_(nullptr),
3345 update_old_value_slow_path_(nullptr) {
3346 if (!kUseBakerReadBarrier) {
3347 // We need to add the slow path now, it is too late when emitting slow path code.
3348 mark_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3349 invoke,
3350 Location::RegisterLocation(old_value_temp.GetCode()),
3351 Location::RegisterLocation(old_value.GetCode()),
3352 Location::RegisterLocation(base.GetCode()),
3353 /*offset=*/ 0u,
3354 /*index=*/ Location::RegisterLocation(offset.GetCode()));
3355 if (!success.IsValid()) {
3356 update_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3357 invoke,
3358 Location::RegisterLocation(old_value.GetCode()),
3359 Location::RegisterLocation(old_value_temp.GetCode()),
3360 Location::RegisterLocation(base.GetCode()),
3361 /*offset=*/ 0u,
3362 /*index=*/ Location::RegisterLocation(offset.GetCode()));
3363 }
3364 }
3365 }
3366
GetDescription() const3367 const char* GetDescription() const override { return "ReadBarrierCasSlowPathARMVIXL"; }
3368
EmitNativeCode(CodeGenerator * codegen)3369 void EmitNativeCode(CodeGenerator* codegen) override {
3370 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
3371 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
3372 __ Bind(GetEntryLabel());
3373
3374 // Mark the `old_value_` from the main path and compare with `expected_`.
3375 if (kUseBakerReadBarrier) {
3376 DCHECK(mark_old_value_slow_path_ == nullptr);
3377 arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
3378 } else {
3379 DCHECK(mark_old_value_slow_path_ != nullptr);
3380 __ B(mark_old_value_slow_path_->GetEntryLabel());
3381 __ Bind(mark_old_value_slow_path_->GetExitLabel());
3382 }
3383 __ Cmp(old_value_temp_, expected_);
3384 if (success_.IsValid()) {
3385 __ Mov(LeaveFlags, success_, 0); // Indicate failure if we take the branch out.
3386 } else {
3387 // In case of failure, update the `old_value_` with the marked reference.
3388 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3389 __ it(ne);
3390 __ mov(ne, old_value_, old_value_temp_);
3391 }
3392 __ B(ne, GetExitLabel());
3393
3394 // The old value we have read did not match `expected` (which is always a to-space
3395 // reference) but after the read barrier the marked to-space value matched, so the
3396 // old value must be a from-space reference to the same object. Do the same CAS loop
3397 // as the main path but check for both `expected` and the unmarked old value
3398 // representing the to-space and from-space references for the same object.
3399
3400 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3401 vixl32::Register tmp_ptr = temps.Acquire();
3402
3403 // Recalculate the `tmp_ptr` clobbered above.
3404 __ Add(tmp_ptr, base_, offset_);
3405
3406 vixl32::Label mark_old_value;
3407 GenerateCompareAndSet(arm_codegen,
3408 DataType::Type::kReference,
3409 strong_,
3410 /*cmp_failure=*/ success_.IsValid() ? GetExitLabel() : &mark_old_value,
3411 /*cmp_failure_is_far_target=*/ success_.IsValid(),
3412 tmp_ptr,
3413 /*expected=*/ LocationFrom(expected_, old_value_),
3414 /*new_value=*/ LocationFrom(new_value_),
3415 /*old_value=*/ LocationFrom(old_value_temp_),
3416 store_result_,
3417 success_);
3418 if (!success_.IsValid()) {
3419 // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
3420 // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
3421 __ Mov(old_value_, expected_);
3422 }
3423
3424 __ B(GetExitLabel());
3425
3426 if (!success_.IsValid()) {
3427 __ Bind(&mark_old_value);
3428 if (kUseBakerReadBarrier) {
3429 DCHECK(update_old_value_slow_path_ == nullptr);
3430 arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
3431 } else {
3432 // Note: We could redirect the `failure` above directly to the entry label and bind
3433 // the exit label in the main path, but the main path would need to access the
3434 // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
3435 DCHECK(update_old_value_slow_path_ != nullptr);
3436 __ B(update_old_value_slow_path_->GetEntryLabel());
3437 __ Bind(update_old_value_slow_path_->GetExitLabel());
3438 }
3439 __ B(GetExitLabel());
3440 }
3441 }
3442
3443 private:
3444 bool strong_;
3445 vixl32::Register base_;
3446 vixl32::Register offset_;
3447 vixl32::Register expected_;
3448 vixl32::Register new_value_;
3449 vixl32::Register old_value_;
3450 vixl32::Register old_value_temp_;
3451 vixl32::Register store_result_;
3452 vixl32::Register success_;
3453 SlowPathCodeARMVIXL* mark_old_value_slow_path_;
3454 SlowPathCodeARMVIXL* update_old_value_slow_path_;
3455 };
3456
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke)3457 static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3458 bool can_call = kEmitCompilerReadBarrier &&
3459 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
3460 LocationSummary* locations =
3461 new (allocator) LocationSummary(invoke,
3462 can_call
3463 ? LocationSummary::kCallOnSlowPath
3464 : LocationSummary::kNoCall,
3465 kIntrinsified);
3466 if (can_call && kUseBakerReadBarrier) {
3467 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
3468 }
3469 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
3470 locations->SetInAt(1, Location::RequiresRegister());
3471 locations->SetInAt(2, Location::RequiresRegister());
3472 locations->SetInAt(3, Location::RequiresRegister());
3473 locations->SetInAt(4, Location::RequiresRegister());
3474
3475 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3476
3477 // Temporary register used in CAS. In the object case (UnsafeCASObject intrinsic),
3478 // this is also used for card-marking, and possibly for read barrier.
3479 locations->AddTemp(Location::RequiresRegister());
3480 }
3481
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)3482 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
3483 DCHECK_NE(type, DataType::Type::kInt64);
3484
3485 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3486 LocationSummary* locations = invoke->GetLocations();
3487
3488 vixl32::Register out = OutputRegister(invoke); // Boolean result.
3489 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
3490 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
3491 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
3492 vixl32::Register new_value = InputRegisterAt(invoke, 4); // New value.
3493
3494 vixl32::Register tmp = RegisterFrom(locations->GetTemp(0)); // Temporary.
3495
3496 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3497 vixl32::Register tmp_ptr = temps.Acquire();
3498
3499 if (type == DataType::Type::kReference) {
3500 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
3501 // object and scan the receiver at the next GC for nothing.
3502 bool value_can_be_null = true; // TODO: Worth finding out this information?
3503 codegen->MarkGCCard(tmp_ptr, tmp, base, new_value, value_can_be_null);
3504 }
3505
3506 vixl32::Label exit_loop_label;
3507 vixl32::Label* exit_loop = &exit_loop_label;
3508 vixl32::Label* cmp_failure = &exit_loop_label;
3509
3510 if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
3511 // If marking, check if the stored reference is a from-space reference to the same
3512 // object as the to-space reference `expected`. If so, perform a custom CAS loop.
3513 ReadBarrierCasSlowPathARMVIXL* slow_path =
3514 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
3515 invoke,
3516 /*strong=*/ true,
3517 base,
3518 offset,
3519 expected,
3520 new_value,
3521 /*old_value=*/ tmp,
3522 /*old_value_temp=*/ out,
3523 /*store_result=*/ tmp,
3524 /*success=*/ out,
3525 codegen);
3526 codegen->AddSlowPath(slow_path);
3527 exit_loop = slow_path->GetExitLabel();
3528 cmp_failure = slow_path->GetEntryLabel();
3529 }
3530
3531 // Unsafe CAS operations have std::memory_order_seq_cst semantics.
3532 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3533 __ Add(tmp_ptr, base, offset);
3534 GenerateCompareAndSet(codegen,
3535 type,
3536 /*strong=*/ true,
3537 cmp_failure,
3538 /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
3539 tmp_ptr,
3540 /*expected=*/ LocationFrom(expected), // TODO: Int64
3541 /*new_value=*/ LocationFrom(new_value), // TODO: Int64
3542 /*old_value=*/ LocationFrom(tmp), // TODO: Int64
3543 /*store_result=*/ tmp,
3544 /*success=*/ out);
3545 __ Bind(exit_loop);
3546 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3547
3548 if (type == DataType::Type::kReference) {
3549 codegen->MaybeGenerateMarkingRegisterCheck(/*code=*/ 128, /*temp_loc=*/ LocationFrom(tmp_ptr));
3550 }
3551 }
3552
VisitUnsafeCASInt(HInvoke * invoke)3553 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3554 CreateUnsafeCASLocations(allocator_, invoke);
3555 }
VisitUnsafeCASObject(HInvoke * invoke)3556 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3557 // The only read barrier implementation supporting the
3558 // UnsafeCASObject intrinsic is the Baker-style read barriers. b/173104084
3559 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
3560 return;
3561 }
3562
3563 CreateUnsafeCASLocations(allocator_, invoke);
3564 }
VisitUnsafeCASInt(HInvoke * invoke)3565 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3566 GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
3567 }
VisitUnsafeCASObject(HInvoke * invoke)3568 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3569 GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
3570 }
3571
3572 enum class GetAndUpdateOp {
3573 kSet,
3574 kAdd,
3575 kAddWithByteSwap,
3576 kAnd,
3577 kOr,
3578 kXor
3579 };
3580
GenerateGetAndUpdate(CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,vixl32::Register ptr,Location arg,Location old_value,vixl32::Register store_result,Location maybe_temp,Location maybe_vreg_temp)3581 static void GenerateGetAndUpdate(CodeGeneratorARMVIXL* codegen,
3582 GetAndUpdateOp get_and_update_op,
3583 DataType::Type load_store_type,
3584 vixl32::Register ptr,
3585 Location arg,
3586 Location old_value,
3587 vixl32::Register store_result,
3588 Location maybe_temp,
3589 Location maybe_vreg_temp) {
3590 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3591
3592 Location loaded_value;
3593 Location new_value;
3594 switch (get_and_update_op) {
3595 case GetAndUpdateOp::kSet:
3596 loaded_value = old_value;
3597 new_value = arg;
3598 break;
3599 case GetAndUpdateOp::kAddWithByteSwap:
3600 if (old_value.IsRegisterPair()) {
3601 // To avoid register overlap when reversing bytes, load into temps.
3602 DCHECK(maybe_temp.IsRegisterPair());
3603 loaded_value = maybe_temp;
3604 new_value = loaded_value; // Use the same temporaries for the new value.
3605 break;
3606 }
3607 FALLTHROUGH_INTENDED;
3608 case GetAndUpdateOp::kAdd:
3609 if (old_value.IsFpuRegisterPair()) {
3610 DCHECK(maybe_temp.IsRegisterPair());
3611 loaded_value = maybe_temp;
3612 new_value = loaded_value; // Use the same temporaries for the new value.
3613 break;
3614 }
3615 if (old_value.IsFpuRegister()) {
3616 DCHECK(maybe_temp.IsRegister());
3617 loaded_value = maybe_temp;
3618 new_value = loaded_value; // Use the same temporary for the new value.
3619 break;
3620 }
3621 FALLTHROUGH_INTENDED;
3622 case GetAndUpdateOp::kAnd:
3623 case GetAndUpdateOp::kOr:
3624 case GetAndUpdateOp::kXor:
3625 loaded_value = old_value;
3626 new_value = maybe_temp;
3627 break;
3628 }
3629
3630 vixl32::Label loop_label;
3631 __ Bind(&loop_label);
3632 EmitLoadExclusive(codegen, load_store_type, ptr, loaded_value);
3633 switch (get_and_update_op) {
3634 case GetAndUpdateOp::kSet:
3635 break;
3636 case GetAndUpdateOp::kAddWithByteSwap:
3637 if (arg.IsFpuRegisterPair()) {
3638 GenerateReverseBytes(assembler, DataType::Type::kFloat64, loaded_value, old_value);
3639 vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3640 __ Vadd(sum, DRegisterFrom(old_value), DRegisterFrom(arg));
3641 __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), sum); // Swap low/high.
3642 } else if (arg.IsFpuRegister()) {
3643 GenerateReverseBytes(assembler, DataType::Type::kFloat32, loaded_value, old_value);
3644 vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp); // The temporary is a pair.
3645 __ Vadd(sum, SRegisterFrom(old_value), SRegisterFrom(arg));
3646 __ Vmov(RegisterFrom(new_value), sum);
3647 } else if (load_store_type == DataType::Type::kInt64) {
3648 GenerateReverseBytes(assembler, DataType::Type::kInt64, loaded_value, old_value);
3649 // Swap low/high registers for the addition results.
3650 __ Adds(HighRegisterFrom(new_value), LowRegisterFrom(old_value), LowRegisterFrom(arg));
3651 __ Adc(LowRegisterFrom(new_value), HighRegisterFrom(old_value), HighRegisterFrom(arg));
3652 } else {
3653 GenerateReverseBytes(assembler, DataType::Type::kInt32, loaded_value, old_value);
3654 __ Add(RegisterFrom(new_value), RegisterFrom(old_value), RegisterFrom(arg));
3655 }
3656 if (load_store_type == DataType::Type::kInt64) {
3657 // The `new_value` already has the high and low word swapped. Reverse bytes in each.
3658 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
3659 } else {
3660 GenerateReverseBytes(assembler, load_store_type, new_value, new_value);
3661 }
3662 break;
3663 case GetAndUpdateOp::kAdd:
3664 if (arg.IsFpuRegisterPair()) {
3665 vixl32::DRegister old_value_vreg = DRegisterFrom(old_value);
3666 vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3667 __ Vmov(old_value_vreg, LowRegisterFrom(loaded_value), HighRegisterFrom(loaded_value));
3668 __ Vadd(sum, old_value_vreg, DRegisterFrom(arg));
3669 __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), sum);
3670 } else if (arg.IsFpuRegister()) {
3671 vixl32::SRegister old_value_vreg = SRegisterFrom(old_value);
3672 vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp); // The temporary is a pair.
3673 __ Vmov(old_value_vreg, RegisterFrom(loaded_value));
3674 __ Vadd(sum, old_value_vreg, SRegisterFrom(arg));
3675 __ Vmov(RegisterFrom(new_value), sum);
3676 } else if (load_store_type == DataType::Type::kInt64) {
3677 __ Adds(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3678 __ Adc(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3679 } else {
3680 __ Add(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3681 }
3682 break;
3683 case GetAndUpdateOp::kAnd:
3684 if (load_store_type == DataType::Type::kInt64) {
3685 __ And(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3686 __ And(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3687 } else {
3688 __ And(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3689 }
3690 break;
3691 case GetAndUpdateOp::kOr:
3692 if (load_store_type == DataType::Type::kInt64) {
3693 __ Orr(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3694 __ Orr(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3695 } else {
3696 __ Orr(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3697 }
3698 break;
3699 case GetAndUpdateOp::kXor:
3700 if (load_store_type == DataType::Type::kInt64) {
3701 __ Eor(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3702 __ Eor(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3703 } else {
3704 __ Eor(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3705 }
3706 break;
3707 }
3708 EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value);
3709 __ Cmp(store_result, 0);
3710 __ B(ne, &loop_label);
3711 }
3712
3713 class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL {
3714 public:
VarHandleSlowPathARMVIXL(HInvoke * invoke,std::memory_order order)3715 VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order)
3716 : IntrinsicSlowPathARMVIXL(invoke),
3717 order_(order),
3718 atomic_(false),
3719 return_success_(false),
3720 strong_(false),
3721 get_and_update_op_(GetAndUpdateOp::kAdd) {
3722 }
3723
GetByteArrayViewCheckLabel()3724 vixl32::Label* GetByteArrayViewCheckLabel() {
3725 return &byte_array_view_check_label_;
3726 }
3727
GetNativeByteOrderLabel()3728 vixl32::Label* GetNativeByteOrderLabel() {
3729 return &native_byte_order_label_;
3730 }
3731
SetAtomic(bool atomic)3732 void SetAtomic(bool atomic) {
3733 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGet ||
3734 GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kSet);
3735 atomic_ = atomic;
3736 }
3737
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)3738 void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
3739 if (return_success) {
3740 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
3741 } else {
3742 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
3743 }
3744 return_success_ = return_success;
3745 strong_ = strong;
3746 }
3747
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3748 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3749 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
3750 get_and_update_op_ = get_and_update_op;
3751 }
3752
EmitNativeCode(CodeGenerator * codegen_in)3753 void EmitNativeCode(CodeGenerator* codegen_in) override {
3754 if (GetByteArrayViewCheckLabel()->IsReferenced()) {
3755 EmitByteArrayViewCode(codegen_in);
3756 }
3757 IntrinsicSlowPathARMVIXL::EmitNativeCode(codegen_in);
3758 }
3759
3760 private:
GetInvoke() const3761 HInvoke* GetInvoke() const {
3762 return GetInstruction()->AsInvoke();
3763 }
3764
GetAccessModeTemplate() const3765 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3766 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3767 }
3768
3769 void EmitByteArrayViewCode(CodeGenerator* codegen_in);
3770
3771 vixl32::Label byte_array_view_check_label_;
3772 vixl32::Label native_byte_order_label_;
3773 // Shared parameter for all VarHandle intrinsics.
3774 std::memory_order order_;
3775 // Extra argument for GenerateVarHandleGet() and GenerateVarHandleSet().
3776 bool atomic_;
3777 // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
3778 bool return_success_;
3779 bool strong_;
3780 // Extra argument for GenerateVarHandleGetAndUpdate().
3781 GetAndUpdateOp get_and_update_op_;
3782 };
3783
3784 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,vixl32::Register object,vixl32::Register type,bool object_can_be_null=true)3785 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL* codegen,
3786 SlowPathCodeARMVIXL* slow_path,
3787 vixl32::Register object,
3788 vixl32::Register type,
3789 bool object_can_be_null = true) {
3790 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3791
3792 const MemberOffset class_offset = mirror::Object::ClassOffset();
3793 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3794
3795 vixl32::Label success;
3796 if (object_can_be_null) {
3797 __ CompareAndBranchIfZero(object, &success, /*is_far_target=*/ false);
3798 }
3799
3800 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3801 vixl32::Register temp = temps.Acquire();
3802
3803 __ Ldr(temp, MemOperand(object, class_offset.Int32Value()));
3804 assembler->MaybeUnpoisonHeapReference(temp);
3805 vixl32::Label loop;
3806 __ Bind(&loop);
3807 __ Cmp(type, temp);
3808 __ B(eq, &success, /*is_far_target=*/ false);
3809 __ Ldr(temp, MemOperand(temp, super_class_offset.Int32Value()));
3810 assembler->MaybeUnpoisonHeapReference(temp);
3811 __ Cmp(temp, 0);
3812 __ B(eq, slow_path->GetEntryLabel());
3813 __ B(&loop);
3814 __ Bind(&success);
3815 }
3816
3817 // Check access mode and the primitive type from VarHandle.varType.
3818 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3819 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,DataType::Type type)3820 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3821 CodeGeneratorARMVIXL* codegen,
3822 SlowPathCodeARMVIXL* slow_path,
3823 DataType::Type type) {
3824 mirror::VarHandle::AccessMode access_mode =
3825 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3826 Primitive::Type primitive_type = DataTypeToPrimitive(type);
3827
3828 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3829 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
3830
3831 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3832 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3833 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3834
3835 // Use the temporary register reserved for offset. It is not used yet at this point.
3836 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3837 vixl32::Register var_type_no_rb =
3838 RegisterFrom(invoke->GetLocations()->GetTemp(expected_coordinates_count == 0u ? 1u : 0u));
3839
3840 // Check that the operation is permitted and the primitive type of varhandle.varType.
3841 // We do not need a read barrier when loading a reference only for loading constant
3842 // primitive field through the reference. Use LDRD to load the fields together.
3843 {
3844 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3845 vixl32::Register temp2 = temps.Acquire();
3846 DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
3847 __ Ldrd(var_type_no_rb, temp2, MemOperand(varhandle, var_type_offset.Int32Value()));
3848 assembler->MaybeUnpoisonHeapReference(var_type_no_rb);
3849 __ Tst(temp2, 1u << static_cast<uint32_t>(access_mode));
3850 __ B(eq, slow_path->GetEntryLabel());
3851 __ Ldrh(temp2, MemOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
3852 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
3853 __ B(ne, slow_path->GetEntryLabel());
3854 }
3855
3856 if (type == DataType::Type::kReference) {
3857 // Check reference arguments against the varType.
3858 // False negatives due to varType being an interface or array type
3859 // or due to the missing read barrier are handled by the slow path.
3860 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3861 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3862 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3863 HInstruction* arg = invoke->InputAt(arg_index);
3864 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3865 if (!arg->IsNullConstant()) {
3866 vixl32::Register arg_reg = RegisterFrom(invoke->GetLocations()->InAt(arg_index));
3867 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
3868 }
3869 }
3870 }
3871 }
3872
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)3873 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3874 CodeGeneratorARMVIXL* codegen,
3875 SlowPathCodeARMVIXL* slow_path) {
3876 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3877 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
3878
3879 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3880
3881 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3882 vixl32::Register temp = temps.Acquire();
3883
3884 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3885 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3886 __ Ldr(temp, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
3887 __ Cmp(temp, 0);
3888 __ B(ne, slow_path->GetEntryLabel());
3889 }
3890
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)3891 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3892 CodeGeneratorARMVIXL* codegen,
3893 SlowPathCodeARMVIXL* slow_path) {
3894 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3895 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
3896 vixl32::Register object = InputRegisterAt(invoke, 1);
3897
3898 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3899 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3900
3901 // Null-check the object.
3902 __ Cmp(object, 0);
3903 __ B(eq, slow_path->GetEntryLabel());
3904
3905 // Use the first temporary register, whether it's for the declaring class or the offset.
3906 // It is not used yet at this point.
3907 vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
3908
3909 // Check that the VarHandle references an instance field by checking that
3910 // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
3911 // type compatibility check with the source object's type, which will fail for null.
3912 {
3913 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3914 vixl32::Register temp2 = temps.Acquire();
3915 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
3916 __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
3917 assembler->MaybeUnpoisonHeapReference(temp);
3918 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3919 __ Cmp(temp2, 0);
3920 __ B(ne, slow_path->GetEntryLabel());
3921 }
3922
3923 // Check that the object has the correct type.
3924 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3925 GenerateSubTypeObjectCheckNoReadBarrier(
3926 codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
3927 }
3928
GetVarHandleExpectedValueType(HInvoke * invoke,size_t expected_coordinates_count)3929 static DataType::Type GetVarHandleExpectedValueType(HInvoke* invoke,
3930 size_t expected_coordinates_count) {
3931 DCHECK_EQ(expected_coordinates_count, GetExpectedVarHandleCoordinatesCount(invoke));
3932 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3933 DCHECK_GE(number_of_arguments, /* VarHandle object */ 1u + expected_coordinates_count);
3934 if (number_of_arguments == /* VarHandle object */ 1u + expected_coordinates_count) {
3935 return invoke->GetType();
3936 } else {
3937 return GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
3938 }
3939 }
3940
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)3941 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3942 CodeGeneratorARMVIXL* codegen,
3943 VarHandleSlowPathARMVIXL* slow_path) {
3944 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3945 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
3946 vixl32::Register object = InputRegisterAt(invoke, 1);
3947 vixl32::Register index = InputRegisterAt(invoke, 2);
3948 DataType::Type value_type =
3949 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3950 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3951
3952 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3953 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3954 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3955 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3956 const MemberOffset class_offset = mirror::Object::ClassOffset();
3957 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3958
3959 // Null-check the object.
3960 __ Cmp(object, 0);
3961 __ B(eq, slow_path->GetEntryLabel());
3962
3963 // Use the offset temporary register. It is not used yet at this point.
3964 vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
3965
3966 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3967 vixl32::Register temp2 = temps.Acquire();
3968
3969 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3970 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3971 // coordinateType0 shall not be null but we do not explicitly verify that.
3972 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
3973 __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
3974 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3975 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3976 __ Cmp(temp2, 0);
3977 __ B(eq, slow_path->GetEntryLabel());
3978
3979 // Check object class against componentType0.
3980 //
3981 // This is an exact check and we defer other cases to the runtime. This includes
3982 // conversion to array of superclass references, which is valid but subsequently
3983 // requires all update operations to check that the value can indeed be stored.
3984 // We do not want to perform such extra checks in the intrinsified code.
3985 //
3986 // We do this check without read barrier, so there can be false negatives which we
3987 // defer to the slow path. There shall be no false negatives for array classes in the
3988 // boot image (including Object[] and primitive arrays) because they are non-movable.
3989 __ Ldr(temp2, MemOperand(object, class_offset.Int32Value()));
3990 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3991 __ Cmp(temp, temp2);
3992 __ B(ne, slow_path->GetEntryLabel());
3993
3994 // Check that the coordinateType0 is an array type. We do not need a read barrier
3995 // for loading constant reference fields (or chains of them) for comparison with null,
3996 // nor for finally loading a constant primitive field (primitive type) below.
3997 __ Ldr(temp2, MemOperand(temp, component_type_offset.Int32Value()));
3998 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3999 __ Cmp(temp2, 0);
4000 __ B(eq, slow_path->GetEntryLabel());
4001
4002 // Check that the array component type matches the primitive type.
4003 // With the exception of `kPrimNot`, `kPrimByte` and `kPrimBoolean`,
4004 // we shall check for a byte array view in the slow path.
4005 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4006 // so we cannot emit that if we're JITting without boot image.
4007 bool boot_image_available =
4008 codegen->GetCompilerOptions().IsBootImage() ||
4009 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4010 DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
4011 size_t can_be_view =
4012 ((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
4013 boot_image_available;
4014 vixl32::Label* slow_path_label =
4015 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4016 __ Ldrh(temp2, MemOperand(temp2, primitive_type_offset.Int32Value()));
4017 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4018 __ B(ne, slow_path_label);
4019
4020 // Check for array index out of bounds.
4021 __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
4022 __ Cmp(index, temp);
4023 __ B(hs, slow_path->GetEntryLabel());
4024 }
4025
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4026 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4027 CodeGeneratorARMVIXL* codegen,
4028 VarHandleSlowPathARMVIXL* slow_path) {
4029 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4030 if (expected_coordinates_count == 0u) {
4031 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4032 } else if (expected_coordinates_count == 1u) {
4033 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4034 } else {
4035 DCHECK_EQ(expected_coordinates_count, 2u);
4036 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4037 }
4038 }
4039
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,DataType::Type type)4040 static VarHandleSlowPathARMVIXL* GenerateVarHandleChecks(HInvoke* invoke,
4041 CodeGeneratorARMVIXL* codegen,
4042 std::memory_order order,
4043 DataType::Type type) {
4044 VarHandleSlowPathARMVIXL* slow_path =
4045 new (codegen->GetScopedAllocator()) VarHandleSlowPathARMVIXL(invoke, order);
4046 codegen->AddSlowPath(slow_path);
4047
4048 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4049 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4050
4051 return slow_path;
4052 }
4053
4054 struct VarHandleTarget {
4055 vixl32::Register object; // The object holding the value to operate on.
4056 vixl32::Register offset; // The offset of the value to operate on.
4057 };
4058
GetVarHandleTarget(HInvoke * invoke)4059 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4060 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4061 LocationSummary* locations = invoke->GetLocations();
4062
4063 VarHandleTarget target;
4064 // The temporary allocated for loading the offset.
4065 target.offset = RegisterFrom(locations->GetTemp(0u));
4066 // The reference to the object that holds the value to operate on.
4067 target.object = (expected_coordinates_count == 0u)
4068 ? RegisterFrom(locations->GetTemp(1u))
4069 : InputRegisterAt(invoke, 1);
4070 return target;
4071 }
4072
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARMVIXL * codegen)4073 static void GenerateVarHandleTarget(HInvoke* invoke,
4074 const VarHandleTarget& target,
4075 CodeGeneratorARMVIXL* codegen) {
4076 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4077 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4078 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4079
4080 if (expected_coordinates_count <= 1u) {
4081 // For static fields, we need to fill the `target.object` with the declaring class,
4082 // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
4083 // we do not need the declaring class, so we can forget the `ArtMethod*` when
4084 // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
4085 vixl32::Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
4086
4087 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4088 const MemberOffset offset_offset = ArtField::OffsetOffset();
4089
4090 // Load the ArtField, the offset and, if needed, declaring class.
4091 __ Ldr(method, MemOperand(varhandle, art_field_offset.Int32Value()));
4092 __ Ldr(target.offset, MemOperand(method, offset_offset.Int32Value()));
4093 if (expected_coordinates_count == 0u) {
4094 codegen->GenerateGcRootFieldLoad(invoke,
4095 LocationFrom(target.object),
4096 method,
4097 ArtField::DeclaringClassOffset().Int32Value(),
4098 kCompilerReadBarrierOption);
4099 }
4100 } else {
4101 DCHECK_EQ(expected_coordinates_count, 2u);
4102 DataType::Type value_type =
4103 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4104 uint32_t size_shift = DataType::SizeShift(value_type);
4105 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4106
4107 vixl32::Register index = InputRegisterAt(invoke, 2);
4108 vixl32::Register shifted_index = index;
4109 if (size_shift != 0u) {
4110 shifted_index = target.offset;
4111 __ Lsl(shifted_index, index, size_shift);
4112 }
4113 __ Add(target.offset, shifted_index, data_offset.Int32Value());
4114 }
4115 }
4116
HasVarHandleIntrinsicImplementation(HInvoke * invoke)4117 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
4118 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4119 if (expected_coordinates_count > 2u) {
4120 // Invalid coordinate count. This invoke shall throw at runtime.
4121 return false;
4122 }
4123 if (expected_coordinates_count != 0u &&
4124 invoke->InputAt(1)->GetType() != DataType::Type::kReference) {
4125 // Except for static fields (no coordinates), the first coordinate must be a reference.
4126 return false;
4127 }
4128 if (expected_coordinates_count == 2u) {
4129 // For arrays and views, the second coordinate must be convertible to `int`.
4130 // In this context, `boolean` is not convertible but we have to look at the shorty
4131 // as compiler transformations can give the invoke a valid boolean input.
4132 DataType::Type index_type = GetDataTypeFromShorty(invoke, 2);
4133 if (index_type == DataType::Type::kBool ||
4134 DataType::Kind(index_type) != DataType::Type::kInt32) {
4135 return false;
4136 }
4137 }
4138
4139 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4140 DataType::Type return_type = invoke->GetType();
4141 mirror::VarHandle::AccessModeTemplate access_mode_template =
4142 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4143 switch (access_mode_template) {
4144 case mirror::VarHandle::AccessModeTemplate::kGet:
4145 // The return type should be the same as varType, so it shouldn't be void.
4146 if (return_type == DataType::Type::kVoid) {
4147 return false;
4148 }
4149 break;
4150 case mirror::VarHandle::AccessModeTemplate::kSet:
4151 if (return_type != DataType::Type::kVoid) {
4152 return false;
4153 }
4154 break;
4155 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet: {
4156 if (return_type != DataType::Type::kBool) {
4157 return false;
4158 }
4159 uint32_t expected_value_index = number_of_arguments - 2;
4160 uint32_t new_value_index = number_of_arguments - 1;
4161 DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4162 DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
4163 if (expected_value_type != new_value_type) {
4164 return false;
4165 }
4166 break;
4167 }
4168 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
4169 uint32_t expected_value_index = number_of_arguments - 2;
4170 uint32_t new_value_index = number_of_arguments - 1;
4171 DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4172 DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
4173 if (expected_value_type != new_value_type || return_type != expected_value_type) {
4174 return false;
4175 }
4176 break;
4177 }
4178 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
4179 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
4180 if (IsVarHandleGetAndAdd(invoke) &&
4181 (value_type == DataType::Type::kReference || value_type == DataType::Type::kBool)) {
4182 // We should only add numerical types.
4183 return false;
4184 } else if (IsVarHandleGetAndBitwiseOp(invoke) && !DataType::IsIntegralType(value_type)) {
4185 // We can only apply operators to bitwise integral types.
4186 // Note that bitwise VarHandle operations accept a non-integral boolean type and
4187 // perform the appropriate logical operation. However, the result is the same as
4188 // using the bitwise operation on our boolean representation and this fits well
4189 // with DataType::IsIntegralType() treating the compiler type kBool as integral.
4190 return false;
4191 }
4192 if (value_type != return_type) {
4193 return false;
4194 }
4195 break;
4196 }
4197 }
4198
4199 return true;
4200 }
4201
CreateVarHandleCommonLocations(HInvoke * invoke)4202 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
4203 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4204 DataType::Type return_type = invoke->GetType();
4205
4206 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4207 LocationSummary* locations =
4208 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4209 locations->SetInAt(0, Location::RequiresRegister());
4210 // Require coordinates in registers. These are the object holding the value
4211 // to operate on (except for static fields) and index (for arrays and views).
4212 for (size_t i = 0; i != expected_coordinates_count; ++i) {
4213 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4214 }
4215 if (return_type != DataType::Type::kVoid) {
4216 if (DataType::IsFloatingPointType(return_type)) {
4217 locations->SetOut(Location::RequiresFpuRegister());
4218 } else {
4219 locations->SetOut(Location::RequiresRegister());
4220 }
4221 }
4222 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4223 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4224 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4225 HInstruction* arg = invoke->InputAt(arg_index);
4226 if (DataType::IsFloatingPointType(arg->GetType())) {
4227 locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4228 } else {
4229 locations->SetInAt(arg_index, Location::RequiresRegister());
4230 }
4231 }
4232
4233 // Add a temporary for offset.
4234 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4235 GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4236 // To preserve the offset value across the non-Baker read barrier slow path
4237 // for loading the declaring class, use a fixed callee-save register.
4238 constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4239 locations->AddTemp(Location::RegisterLocation(first_callee_save));
4240 } else {
4241 locations->AddTemp(Location::RequiresRegister());
4242 }
4243 if (expected_coordinates_count == 0u) {
4244 // Add a temporary to hold the declaring class.
4245 locations->AddTemp(Location::RequiresRegister());
4246 }
4247
4248 return locations;
4249 }
4250
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4251 static void CreateVarHandleGetLocations(HInvoke* invoke,
4252 CodeGeneratorARMVIXL* codegen,
4253 bool atomic) {
4254 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4255 return;
4256 }
4257
4258 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4259 invoke->GetType() == DataType::Type::kReference &&
4260 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4261 invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4262 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4263 // the passed reference and reloads it from the field. This gets the memory visibility
4264 // wrong for Acquire/Volatile operations. b/173104084
4265 return;
4266 }
4267
4268 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4269
4270 DataType::Type type = invoke->GetType();
4271 if (type == DataType::Type::kFloat64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
4272 // We need 3 temporaries for GenerateIntrinsicGet() but we can reuse the
4273 // declaring class (if present) and offset temporary.
4274 DCHECK_EQ(locations->GetTempCount(),
4275 (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4276 locations->AddRegisterTemps(3u - locations->GetTempCount());
4277 }
4278 }
4279
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4280 static void GenerateVarHandleGet(HInvoke* invoke,
4281 CodeGeneratorARMVIXL* codegen,
4282 std::memory_order order,
4283 bool atomic,
4284 bool byte_swap = false) {
4285 DataType::Type type = invoke->GetType();
4286 DCHECK_NE(type, DataType::Type::kVoid);
4287
4288 LocationSummary* locations = invoke->GetLocations();
4289 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4290 Location out = locations->Out();
4291
4292 VarHandleTarget target = GetVarHandleTarget(invoke);
4293 VarHandleSlowPathARMVIXL* slow_path = nullptr;
4294 if (!byte_swap) {
4295 slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4296 slow_path->SetAtomic(atomic);
4297 GenerateVarHandleTarget(invoke, target, codegen);
4298 __ Bind(slow_path->GetNativeByteOrderLabel());
4299 }
4300
4301 Location maybe_temp = Location::NoLocation();
4302 Location maybe_temp2 = Location::NoLocation();
4303 Location maybe_temp3 = Location::NoLocation();
4304 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) {
4305 // Reuse the offset temporary.
4306 maybe_temp = LocationFrom(target.offset);
4307 } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4308 // Reuse the offset temporary and declaring class (if present).
4309 // The address shall be constructed in the scratch register before they are clobbered.
4310 maybe_temp = LocationFrom(target.offset);
4311 DCHECK(maybe_temp.Equals(locations->GetTemp(0)));
4312 if (type == DataType::Type::kFloat64) {
4313 maybe_temp2 = locations->GetTemp(1);
4314 maybe_temp3 = locations->GetTemp(2);
4315 }
4316 }
4317
4318 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4319 Location loaded_value = out;
4320 DataType::Type load_type = type;
4321 if (byte_swap) {
4322 if (type == DataType::Type::kFloat64) {
4323 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4324 // Change load type to Int64 and promote `maybe_temp2` and `maybe_temp3` to `loaded_value`.
4325 loaded_value = LocationFrom(RegisterFrom(maybe_temp2), RegisterFrom(maybe_temp3));
4326 maybe_temp2 = Location::NoLocation();
4327 maybe_temp3 = Location::NoLocation();
4328 } else {
4329 // Use the offset temporary and the scratch register.
4330 loaded_value = LocationFrom(target.offset, temps.Acquire());
4331 }
4332 load_type = DataType::Type::kInt64;
4333 } else if (type == DataType::Type::kFloat32) {
4334 // Reuse the offset temporary.
4335 loaded_value = LocationFrom(target.offset);
4336 load_type = DataType::Type::kInt32;
4337 } else if (type == DataType::Type::kInt64) {
4338 // Swap the high and low registers and reverse the bytes in each after the load.
4339 loaded_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
4340 }
4341 }
4342
4343 GenerateIntrinsicGet(invoke,
4344 codegen,
4345 load_type,
4346 order,
4347 atomic,
4348 target.object,
4349 target.offset,
4350 loaded_value,
4351 maybe_temp,
4352 maybe_temp2,
4353 maybe_temp3);
4354 if (byte_swap) {
4355 if (type == DataType::Type::kInt64) {
4356 GenerateReverseBytesInPlaceForEachWord(assembler, loaded_value);
4357 } else {
4358 GenerateReverseBytes(assembler, type, loaded_value, out);
4359 }
4360 }
4361
4362 if (!byte_swap) {
4363 __ Bind(slow_path->GetExitLabel());
4364 }
4365 }
4366
VisitVarHandleGet(HInvoke * invoke)4367 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4368 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ false);
4369 }
4370
VisitVarHandleGet(HInvoke * invoke)4371 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4372 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4373 }
4374
VisitVarHandleGetOpaque(HInvoke * invoke)4375 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4376 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4377 }
4378
VisitVarHandleGetOpaque(HInvoke * invoke)4379 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4380 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4381 }
4382
VisitVarHandleGetAcquire(HInvoke * invoke)4383 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4384 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4385 }
4386
VisitVarHandleGetAcquire(HInvoke * invoke)4387 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4388 GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire, /*atomic=*/ true);
4389 }
4390
VisitVarHandleGetVolatile(HInvoke * invoke)4391 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4392 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4393 }
4394
VisitVarHandleGetVolatile(HInvoke * invoke)4395 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4396 GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4397 }
4398
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4399 static void CreateVarHandleSetLocations(HInvoke* invoke,
4400 CodeGeneratorARMVIXL* codegen,
4401 bool atomic) {
4402 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4403 return;
4404 }
4405
4406 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4407
4408 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4409 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4410 if (DataType::Is64BitType(value_type)) {
4411 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4412 DCHECK_EQ(locations->GetTempCount(), (expected_coordinates_count == 0) ? 2u : 1u);
4413 HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
4414 bool has_reverse_bytes_slow_path =
4415 (expected_coordinates_count == 2u) &&
4416 !(arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern());
4417 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4418 // We need 4 temporaries in the byte array view slow path. Otherwise, we need
4419 // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
4420 // We can reuse the offset temporary and declaring class (if present).
4421 size_t temps_needed = has_reverse_bytes_slow_path
4422 ? 4u
4423 : ((value_type == DataType::Type::kFloat64) ? 3u : 2u);
4424 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4425 } else if (has_reverse_bytes_slow_path) {
4426 // We need 2 temps for the value with reversed bytes in the byte array view slow path.
4427 // We can reuse the offset temporary.
4428 DCHECK_EQ(locations->GetTempCount(), 1u);
4429 locations->AddTemp(Location::RequiresRegister());
4430 }
4431 }
4432 }
4433
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4434 static void GenerateVarHandleSet(HInvoke* invoke,
4435 CodeGeneratorARMVIXL* codegen,
4436 std::memory_order order,
4437 bool atomic,
4438 bool byte_swap = false) {
4439 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4440 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4441
4442 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4443 LocationSummary* locations = invoke->GetLocations();
4444 Location value = locations->InAt(value_index);
4445
4446 VarHandleTarget target = GetVarHandleTarget(invoke);
4447 VarHandleSlowPathARMVIXL* slow_path = nullptr;
4448 if (!byte_swap) {
4449 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4450 slow_path->SetAtomic(atomic);
4451 GenerateVarHandleTarget(invoke, target, codegen);
4452 __ Bind(slow_path->GetNativeByteOrderLabel());
4453 }
4454
4455 Location maybe_temp = Location::NoLocation();
4456 Location maybe_temp2 = Location::NoLocation();
4457 Location maybe_temp3 = Location::NoLocation();
4458 if (DataType::Is64BitType(value_type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4459 // Reuse the offset temporary and declaring class (if present).
4460 // The address shall be constructed in the scratch register before they are clobbered.
4461 maybe_temp = locations->GetTemp(0);
4462 maybe_temp2 = locations->GetTemp(1);
4463 if (value_type == DataType::Type::kFloat64) {
4464 maybe_temp3 = locations->GetTemp(2);
4465 }
4466 }
4467
4468 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4469 if (byte_swap) {
4470 if (DataType::Is64BitType(value_type) || value_type == DataType::Type::kFloat32) {
4471 // Calculate the address in scratch register, so that we can use the offset temporary.
4472 vixl32::Register base = temps.Acquire();
4473 __ Add(base, target.object, target.offset);
4474 target.object = base;
4475 target.offset = vixl32::Register();
4476 }
4477 Location original_value = value;
4478 if (DataType::Is64BitType(value_type)) {
4479 size_t temp_start = 0u;
4480 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4481 // Clear `maybe_temp3` which was initialized above for Float64.
4482 DCHECK(value_type != DataType::Type::kFloat64 || maybe_temp3.Equals(locations->GetTemp(2)));
4483 maybe_temp3 = Location::NoLocation();
4484 temp_start = 2u;
4485 }
4486 value = LocationFrom(RegisterFrom(locations->GetTemp(temp_start)),
4487 RegisterFrom(locations->GetTemp(temp_start + 1u)));
4488 if (value_type == DataType::Type::kFloat64) {
4489 __ Vmov(HighRegisterFrom(value), LowRegisterFrom(value), DRegisterFrom(original_value));
4490 GenerateReverseBytesInPlaceForEachWord(assembler, value);
4491 value_type = DataType::Type::kInt64;
4492 } else {
4493 GenerateReverseBytes(assembler, value_type, original_value, value);
4494 }
4495 } else if (value_type == DataType::Type::kFloat32) {
4496 value = locations->GetTemp(0); // Use the offset temporary which was freed above.
4497 __ Vmov(RegisterFrom(value), SRegisterFrom(original_value));
4498 GenerateReverseBytes(assembler, DataType::Type::kInt32, value, value);
4499 value_type = DataType::Type::kInt32;
4500 } else {
4501 value = LocationFrom(temps.Acquire());
4502 GenerateReverseBytes(assembler, value_type, original_value, value);
4503 }
4504 }
4505
4506 GenerateIntrinsicSet(codegen,
4507 value_type,
4508 order,
4509 atomic,
4510 target.object,
4511 target.offset,
4512 value,
4513 maybe_temp,
4514 maybe_temp2,
4515 maybe_temp3);
4516
4517 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4518 // Reuse the offset temporary for MarkGCCard.
4519 vixl32::Register temp = target.offset;
4520 vixl32::Register card = temps.Acquire();
4521 vixl32::Register value_reg = RegisterFrom(value);
4522 codegen->MarkGCCard(temp, card, target.object, value_reg, /*value_can_be_null=*/ true);
4523 }
4524
4525 if (!byte_swap) {
4526 __ Bind(slow_path->GetExitLabel());
4527 }
4528 }
4529
VisitVarHandleSet(HInvoke * invoke)4530 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4531 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ false);
4532 }
4533
VisitVarHandleSet(HInvoke * invoke)4534 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4535 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4536 }
4537
VisitVarHandleSetOpaque(HInvoke * invoke)4538 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4539 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4540 }
4541
VisitVarHandleSetOpaque(HInvoke * invoke)4542 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4543 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4544 }
4545
VisitVarHandleSetRelease(HInvoke * invoke)4546 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4547 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4548 }
4549
VisitVarHandleSetRelease(HInvoke * invoke)4550 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4551 GenerateVarHandleSet(invoke, codegen_, std::memory_order_release, /*atomic=*/ true);
4552 }
4553
VisitVarHandleSetVolatile(HInvoke * invoke)4554 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4555 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4556 }
4557
VisitVarHandleSetVolatile(HInvoke * invoke)4558 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4559 // ARM store-release instructions are implicitly sequentially consistent.
4560 GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4561 }
4562
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,bool return_success)4563 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
4564 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4565 return;
4566 }
4567
4568 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4569 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4570 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4571 value_type == DataType::Type::kReference) {
4572 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4573 // the passed reference and reloads it from the field. This breaks the read barriers
4574 // in slow path in different ways. The marked old value may not actually be a to-space
4575 // reference to the same object as `old_value`, breaking slow path assumptions. And
4576 // for CompareAndExchange, marking the old value after comparison failure may actually
4577 // return the reference to `expected`, erroneously indicating success even though we
4578 // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4579 return;
4580 }
4581
4582 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4583
4584 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4585 // We need callee-save registers for both the class object and offset instead of
4586 // the temporaries reserved in CreateVarHandleCommonLocations().
4587 static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
4588 constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4589 constexpr int second_callee_save = CTZ(kArmCalleeSaveRefSpills ^ (1u << first_callee_save));
4590 if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4591 DCHECK_EQ(locations->GetTempCount(), 2u);
4592 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4593 DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4594 locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4595 } else {
4596 DCHECK_EQ(locations->GetTempCount(), 1u);
4597 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4598 locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4599 }
4600 }
4601
4602 if (DataType::IsFloatingPointType(value_type)) {
4603 // We can reuse the declaring class (if present) and offset temporary.
4604 DCHECK_EQ(locations->GetTempCount(),
4605 (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4606 size_t temps_needed = (value_type == DataType::Type::kFloat64)
4607 ? (return_success ? 5u : 7u)
4608 : (return_success ? 3u : 4u);
4609 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4610 } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4611 // Add temps for the byte-reversed `expected` and `new_value` in the byte array view slow path.
4612 DCHECK_EQ(locations->GetTempCount(), 1u);
4613 if (value_type == DataType::Type::kInt64) {
4614 // We would ideally add 4 temps for Int64 but that would simply run out of registers,
4615 // so we instead need to reverse bytes in actual arguments and undo it at the end.
4616 } else {
4617 locations->AddRegisterTemps(2u);
4618 }
4619 }
4620 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4621 // Add a temporary for store result, also used for the `old_value_temp` in slow path.
4622 locations->AddTemp(Location::RequiresRegister());
4623 }
4624 }
4625
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4626 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4627 CodeGeneratorARMVIXL* codegen,
4628 std::memory_order order,
4629 bool return_success,
4630 bool strong,
4631 bool byte_swap = false) {
4632 DCHECK(return_success || strong);
4633
4634 uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4635 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4636 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4637 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4638
4639 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4640 LocationSummary* locations = invoke->GetLocations();
4641 Location expected = locations->InAt(expected_index);
4642 Location new_value = locations->InAt(new_value_index);
4643 Location out = locations->Out();
4644
4645 VarHandleTarget target = GetVarHandleTarget(invoke);
4646 VarHandleSlowPathARMVIXL* slow_path = nullptr;
4647 if (!byte_swap) {
4648 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4649 slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4650 GenerateVarHandleTarget(invoke, target, codegen);
4651 __ Bind(slow_path->GetNativeByteOrderLabel());
4652 }
4653
4654 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
4655 bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
4656 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
4657 DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
4658
4659 if (release_barrier) {
4660 codegen->GenerateMemoryBarrier(
4661 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
4662 }
4663
4664 // Calculate the pointer to the value.
4665 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4666 vixl32::Register tmp_ptr = temps.Acquire();
4667 __ Add(tmp_ptr, target.object, target.offset);
4668
4669 // Move floating point values to temporaries and prepare output registers.
4670 // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4671 // Reuse the declaring class (if present) and offset temporary for non-reference types,
4672 // the address has already been constructed in the scratch register. We are more careful
4673 // for references due to read and write barrier, see below.
4674 Location old_value;
4675 vixl32::Register store_result;
4676 vixl32::Register success = return_success ? RegisterFrom(out) : vixl32::Register();
4677 DataType::Type cas_type = value_type;
4678 if (value_type == DataType::Type::kFloat64) {
4679 vixl32::DRegister expected_vreg = DRegisterFrom(expected);
4680 vixl32::DRegister new_value_vreg = DRegisterFrom(new_value);
4681 expected =
4682 LocationFrom(RegisterFrom(locations->GetTemp(0)), RegisterFrom(locations->GetTemp(1)));
4683 new_value =
4684 LocationFrom(RegisterFrom(locations->GetTemp(2)), RegisterFrom(locations->GetTemp(3)));
4685 store_result = RegisterFrom(locations->GetTemp(4));
4686 old_value = return_success
4687 ? LocationFrom(success, store_result)
4688 : LocationFrom(RegisterFrom(locations->GetTemp(5)), RegisterFrom(locations->GetTemp(6)));
4689 if (byte_swap) {
4690 __ Vmov(HighRegisterFrom(expected), LowRegisterFrom(expected), expected_vreg);
4691 __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), new_value_vreg);
4692 GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4693 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4694 } else {
4695 __ Vmov(LowRegisterFrom(expected), HighRegisterFrom(expected), expected_vreg);
4696 __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), new_value_vreg);
4697 }
4698 cas_type = DataType::Type::kInt64;
4699 } else if (value_type == DataType::Type::kFloat32) {
4700 vixl32::SRegister expected_vreg = SRegisterFrom(expected);
4701 vixl32::SRegister new_value_vreg = SRegisterFrom(new_value);
4702 expected = locations->GetTemp(0);
4703 new_value = locations->GetTemp(1);
4704 store_result = RegisterFrom(locations->GetTemp(2));
4705 old_value = return_success ? LocationFrom(store_result) : locations->GetTemp(3);
4706 __ Vmov(RegisterFrom(expected), expected_vreg);
4707 __ Vmov(RegisterFrom(new_value), new_value_vreg);
4708 if (byte_swap) {
4709 GenerateReverseBytes(assembler, DataType::Type::kInt32, expected, expected);
4710 GenerateReverseBytes(assembler, DataType::Type::kInt32, new_value, new_value);
4711 }
4712 cas_type = DataType::Type::kInt32;
4713 } else if (value_type == DataType::Type::kInt64) {
4714 store_result = RegisterFrom(locations->GetTemp(0));
4715 old_value = return_success
4716 ? LocationFrom(success, store_result)
4717 // If swapping bytes, swap the high/low regs and reverse the bytes in each after the load.
4718 : byte_swap ? LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)) : out;
4719 if (byte_swap) {
4720 // Due to lack of registers, reverse bytes in `expected` and `new_value` and undo that later.
4721 GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4722 expected = LocationFrom(HighRegisterFrom(expected), LowRegisterFrom(expected));
4723 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4724 new_value = LocationFrom(HighRegisterFrom(new_value), LowRegisterFrom(new_value));
4725 }
4726 } else {
4727 // Use the last temp. For references with read barriers, this is an extra temporary
4728 // allocated to avoid overwriting the temporaries for declaring class (if present)
4729 // and offset as they are needed in the slow path. Otherwise, this is the offset
4730 // temporary which also works for references without read barriers that need the
4731 // object register preserved for the write barrier.
4732 store_result = RegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u));
4733 old_value = return_success ? LocationFrom(store_result) : out;
4734 if (byte_swap) {
4735 DCHECK_EQ(locations->GetTempCount(), 3u);
4736 Location original_expected = expected;
4737 Location original_new_value = new_value;
4738 expected = locations->GetTemp(0);
4739 new_value = locations->GetTemp(1);
4740 GenerateReverseBytes(assembler, value_type, original_expected, expected);
4741 GenerateReverseBytes(assembler, value_type, original_new_value, new_value);
4742 }
4743 }
4744
4745 vixl32::Label exit_loop_label;
4746 vixl32::Label* exit_loop = &exit_loop_label;
4747 vixl32::Label* cmp_failure = &exit_loop_label;
4748
4749 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4750 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
4751 // reloaded old value for subsequent CAS in the slow path.
4752 vixl32::Register old_value_temp = store_result;
4753 ReadBarrierCasSlowPathARMVIXL* rb_slow_path =
4754 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
4755 invoke,
4756 strong,
4757 target.object,
4758 target.offset,
4759 RegisterFrom(expected),
4760 RegisterFrom(new_value),
4761 RegisterFrom(old_value),
4762 old_value_temp,
4763 store_result,
4764 success,
4765 codegen);
4766 codegen->AddSlowPath(rb_slow_path);
4767 exit_loop = rb_slow_path->GetExitLabel();
4768 cmp_failure = rb_slow_path->GetEntryLabel();
4769 }
4770
4771 GenerateCompareAndSet(codegen,
4772 cas_type,
4773 strong,
4774 cmp_failure,
4775 /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
4776 tmp_ptr,
4777 expected,
4778 new_value,
4779 old_value,
4780 store_result,
4781 success);
4782 __ Bind(exit_loop);
4783
4784 if (acquire_barrier) {
4785 codegen->GenerateMemoryBarrier(
4786 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
4787 }
4788
4789 if (!return_success) {
4790 if (byte_swap) {
4791 if (value_type == DataType::Type::kInt64) {
4792 GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
4793 // Undo byte swapping in `expected` and `new_value`. We do not have the
4794 // information whether the value in these registers shall be needed later.
4795 GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4796 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4797 } else {
4798 GenerateReverseBytes(assembler, value_type, old_value, out);
4799 }
4800 } else if (value_type == DataType::Type::kFloat64) {
4801 __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
4802 } else if (value_type == DataType::Type::kFloat32) {
4803 __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
4804 }
4805 }
4806
4807 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
4808 // Reuse the offset temporary and scratch register for MarkGCCard.
4809 vixl32::Register temp = target.offset;
4810 vixl32::Register card = tmp_ptr;
4811 // Mark card for object assuming new value is stored.
4812 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
4813 codegen->MarkGCCard(temp, card, target.object, RegisterFrom(new_value), new_value_can_be_null);
4814 }
4815
4816 if (!byte_swap) {
4817 __ Bind(slow_path->GetExitLabel());
4818 }
4819 }
4820
VisitVarHandleCompareAndExchange(HInvoke * invoke)4821 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4822 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4823 }
4824
VisitVarHandleCompareAndExchange(HInvoke * invoke)4825 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4826 GenerateVarHandleCompareAndSetOrExchange(
4827 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
4828 }
4829
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4830 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4831 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4832 }
4833
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4834 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4835 GenerateVarHandleCompareAndSetOrExchange(
4836 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
4837 }
4838
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4839 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4840 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4841 }
4842
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4843 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4844 GenerateVarHandleCompareAndSetOrExchange(
4845 invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
4846 }
4847
VisitVarHandleCompareAndSet(HInvoke * invoke)4848 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4849 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4850 }
4851
VisitVarHandleCompareAndSet(HInvoke * invoke)4852 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4853 GenerateVarHandleCompareAndSetOrExchange(
4854 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
4855 }
4856
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4857 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4858 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4859 }
4860
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4861 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4862 GenerateVarHandleCompareAndSetOrExchange(
4863 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
4864 }
4865
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4866 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4867 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4868 }
4869
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4870 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4871 GenerateVarHandleCompareAndSetOrExchange(
4872 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
4873 }
4874
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4875 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4876 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4877 }
4878
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4879 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4880 GenerateVarHandleCompareAndSetOrExchange(
4881 invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
4882 }
4883
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4884 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4885 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4886 }
4887
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4888 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4889 GenerateVarHandleCompareAndSetOrExchange(
4890 invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
4891 }
4892
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,GetAndUpdateOp get_and_update_op)4893 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
4894 GetAndUpdateOp get_and_update_op) {
4895 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4896 return;
4897 }
4898
4899 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4900 invoke->GetType() == DataType::Type::kReference) {
4901 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4902 // the passed reference and reloads it from the field, thus seeing the new value
4903 // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
4904 return;
4905 }
4906
4907 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4908
4909 // We can reuse the declaring class (if present) and offset temporary, except for
4910 // non-Baker read barriers that need them for the slow path.
4911 DCHECK_EQ(locations->GetTempCount(),
4912 (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4913
4914 DataType::Type value_type = invoke->GetType();
4915 if (get_and_update_op == GetAndUpdateOp::kSet) {
4916 if (DataType::IsFloatingPointType(value_type)) {
4917 // Add temps needed to do the GenerateGetAndUpdate() with core registers.
4918 size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
4919 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4920 } else if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4921 value_type == DataType::Type::kReference) {
4922 // We need to preserve the declaring class (if present) and offset for read barrier
4923 // slow paths, so we must use a separate temporary for the exclusive store result.
4924 locations->AddTemp(Location::RequiresRegister());
4925 } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4926 // Add temps for the byte-reversed `arg` in the byte array view slow path.
4927 DCHECK_EQ(locations->GetTempCount(), 1u);
4928 locations->AddRegisterTemps((value_type == DataType::Type::kInt64) ? 2u : 1u);
4929 }
4930 } else {
4931 // We need temporaries for the new value and exclusive store result.
4932 size_t temps_needed = DataType::Is64BitType(value_type) ? 3u : 2u;
4933 if (get_and_update_op != GetAndUpdateOp::kAdd &&
4934 GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4935 // Add temps for the byte-reversed `arg` in the byte array view slow path.
4936 if (value_type == DataType::Type::kInt64) {
4937 // We would ideally add 2 temps for Int64 but that would simply run out of registers,
4938 // so we instead need to reverse bytes in the actual argument and undo it at the end.
4939 } else {
4940 temps_needed += 1u;
4941 }
4942 }
4943 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4944 if (DataType::IsFloatingPointType(value_type)) {
4945 // Note: This shall allocate a D register. There is no way to request an S register.
4946 locations->AddTemp(Location::RequiresFpuRegister());
4947 }
4948 }
4949 }
4950
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)4951 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
4952 CodeGeneratorARMVIXL* codegen,
4953 GetAndUpdateOp get_and_update_op,
4954 std::memory_order order,
4955 bool byte_swap = false) {
4956 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
4957 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
4958
4959 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4960 LocationSummary* locations = invoke->GetLocations();
4961 Location arg = locations->InAt(arg_index);
4962 Location out = locations->Out();
4963
4964 VarHandleTarget target = GetVarHandleTarget(invoke);
4965 VarHandleSlowPathARMVIXL* slow_path = nullptr;
4966 if (!byte_swap) {
4967 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4968 slow_path->SetGetAndUpdateOp(get_and_update_op);
4969 GenerateVarHandleTarget(invoke, target, codegen);
4970 __ Bind(slow_path->GetNativeByteOrderLabel());
4971 }
4972
4973 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
4974 bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
4975 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
4976 DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
4977
4978 if (release_barrier) {
4979 codegen->GenerateMemoryBarrier(
4980 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
4981 }
4982
4983 // Use the scratch register for the pointer to the target location.
4984 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4985 vixl32::Register tmp_ptr = temps.Acquire();
4986 __ Add(tmp_ptr, target.object, target.offset);
4987
4988 // Use the offset temporary for the exclusive store result.
4989 vixl32::Register store_result = target.offset;
4990
4991 // The load/store type is never floating point.
4992 DataType::Type load_store_type = DataType::IsFloatingPointType(value_type)
4993 ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
4994 : value_type;
4995
4996 // Prepare register for old value and temporaries if any.
4997 Location old_value = out;
4998 Location maybe_temp = Location::NoLocation();
4999 Location maybe_vreg_temp = Location::NoLocation();
5000 if (get_and_update_op == GetAndUpdateOp::kSet) {
5001 // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5002 // rather than moving between core and FP registers in the loop.
5003 if (value_type == DataType::Type::kFloat64) {
5004 vixl32::DRegister arg_vreg = DRegisterFrom(arg);
5005 DCHECK_EQ(locations->GetTempCount(), 5u); // `store_result` and the four here.
5006 old_value =
5007 LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)));
5008 arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4)));
5009 if (byte_swap) {
5010 __ Vmov(HighRegisterFrom(arg), LowRegisterFrom(arg), arg_vreg);
5011 GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5012 } else {
5013 __ Vmov(LowRegisterFrom(arg), HighRegisterFrom(arg), arg_vreg);
5014 }
5015 } else if (value_type == DataType::Type::kFloat32) {
5016 vixl32::SRegister arg_vreg = SRegisterFrom(arg);
5017 DCHECK_EQ(locations->GetTempCount(), 3u); // `store_result` and the two here.
5018 old_value = locations->GetTemp(1);
5019 arg = locations->GetTemp(2);
5020 __ Vmov(RegisterFrom(arg), arg_vreg);
5021 if (byte_swap) {
5022 GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
5023 }
5024 } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5025 if (kUseBakerReadBarrier) {
5026 // Load the old value initially to a temporary register.
5027 // We shall move it to `out` later with a read barrier.
5028 old_value = LocationFrom(store_result);
5029 store_result = RegisterFrom(out); // Use the `out` for the exclusive store result.
5030 } else {
5031 // The store_result is a separate temporary.
5032 DCHECK(!store_result.Is(target.object));
5033 DCHECK(!store_result.Is(target.offset));
5034 }
5035 } else if (byte_swap) {
5036 Location original_arg = arg;
5037 arg = locations->GetTemp(1);
5038 if (value_type == DataType::Type::kInt64) {
5039 arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2)));
5040 // Swap the high/low regs and reverse the bytes in each after the load.
5041 old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5042 }
5043 GenerateReverseBytes(assembler, value_type, original_arg, arg);
5044 }
5045 } else {
5046 maybe_temp = DataType::Is64BitType(value_type)
5047 ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
5048 : locations->GetTemp(1);
5049 DCHECK(!maybe_temp.Contains(LocationFrom(store_result)));
5050 if (DataType::IsFloatingPointType(value_type)) {
5051 maybe_vreg_temp = locations->GetTemp(locations->GetTempCount() - 1u);
5052 DCHECK(maybe_vreg_temp.IsFpuRegisterPair());
5053 }
5054 if (byte_swap) {
5055 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5056 // We need to do the byte swapping in the CAS loop for GetAndAdd.
5057 get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5058 } else if (value_type == DataType::Type::kInt64) {
5059 // Swap the high/low regs and reverse the bytes in each after the load.
5060 old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5061 // Due to lack of registers, reverse bytes in `arg` and undo that later.
5062 GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5063 arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg));
5064 } else {
5065 DCHECK(!DataType::IsFloatingPointType(value_type));
5066 Location original_arg = arg;
5067 arg = locations->GetTemp(2);
5068 DCHECK(!arg.Contains(LocationFrom(store_result)));
5069 GenerateReverseBytes(assembler, value_type, original_arg, arg);
5070 }
5071 }
5072 }
5073
5074 GenerateGetAndUpdate(codegen,
5075 get_and_update_op,
5076 load_store_type,
5077 tmp_ptr,
5078 arg,
5079 old_value,
5080 store_result,
5081 maybe_temp,
5082 maybe_vreg_temp);
5083
5084 if (acquire_barrier) {
5085 codegen->GenerateMemoryBarrier(
5086 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5087 }
5088
5089 if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
5090 if (value_type == DataType::Type::kInt64) {
5091 GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5092 if (get_and_update_op != GetAndUpdateOp::kSet) {
5093 // Undo byte swapping in `arg`. We do not have the information
5094 // whether the value in these registers shall be needed later.
5095 GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5096 }
5097 } else {
5098 GenerateReverseBytes(assembler, value_type, old_value, out);
5099 }
5100 } else if (get_and_update_op == GetAndUpdateOp::kSet &&
5101 DataType::IsFloatingPointType(value_type)) {
5102 if (value_type == DataType::Type::kFloat64) {
5103 __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5104 } else {
5105 __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
5106 }
5107 } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5108 if (kUseBakerReadBarrier) {
5109 codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(RegisterFrom(out),
5110 RegisterFrom(old_value));
5111 } else {
5112 codegen->GenerateReadBarrierSlow(
5113 invoke,
5114 Location::RegisterLocation(RegisterFrom(out).GetCode()),
5115 Location::RegisterLocation(RegisterFrom(old_value).GetCode()),
5116 Location::RegisterLocation(target.object.GetCode()),
5117 /*offset=*/ 0u,
5118 /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5119 }
5120 }
5121
5122 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5123 // Reuse the offset temporary and scratch register for MarkGCCard.
5124 vixl32::Register temp = target.offset;
5125 vixl32::Register card = tmp_ptr;
5126 // Mark card for object assuming new value is stored.
5127 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
5128 codegen->MarkGCCard(temp, card, target.object, RegisterFrom(arg), new_value_can_be_null);
5129 }
5130
5131 if (!byte_swap) {
5132 __ Bind(slow_path->GetExitLabel());
5133 }
5134 }
5135
VisitVarHandleGetAndSet(HInvoke * invoke)5136 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5137 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5138 }
5139
VisitVarHandleGetAndSet(HInvoke * invoke)5140 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5141 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5142 }
5143
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5144 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5145 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5146 }
5147
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5148 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5149 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5150 }
5151
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5152 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5153 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5154 }
5155
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5156 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5157 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5158 }
5159
VisitVarHandleGetAndAdd(HInvoke * invoke)5160 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5161 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5162 }
5163
VisitVarHandleGetAndAdd(HInvoke * invoke)5164 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5165 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5166 }
5167
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5168 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5169 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5170 }
5171
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5172 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5173 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5174 }
5175
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5176 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5177 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5178 }
5179
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5180 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5181 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5182 }
5183
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5184 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5185 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5186 }
5187
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5188 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5189 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5190 }
5191
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5192 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5193 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5194 }
5195
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5196 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5197 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5198 }
5199
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5200 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5201 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5202 }
5203
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5204 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5205 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5206 }
5207
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5208 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5209 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5210 }
5211
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5212 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5213 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5214 }
5215
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5216 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5217 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5218 }
5219
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5220 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5221 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5222 }
5223
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5224 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5225 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5226 }
5227
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5228 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5229 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5230 }
5231
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5232 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5233 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5234 }
5235
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5236 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5237 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5238 }
5239
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5240 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5241 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5242 }
5243
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5244 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5245 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5246 }
5247
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5248 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5249 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5250 }
5251
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5252 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5253 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5254 }
5255
EmitByteArrayViewCode(CodeGenerator * codegen_in)5256 void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5257 DCHECK(GetByteArrayViewCheckLabel()->IsReferenced());
5258 CodeGeneratorARMVIXL* codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_in);
5259 ArmVIXLAssembler* assembler = codegen->GetAssembler();
5260 HInvoke* invoke = GetInvoke();
5261 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5262 DataType::Type value_type =
5263 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5264 DCHECK_NE(value_type, DataType::Type::kReference);
5265 size_t size = DataType::Size(value_type);
5266 DCHECK_GT(size, 1u);
5267 vixl32::Operand size_operand(dchecked_integral_cast<int32_t>(size));
5268 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
5269 vixl32::Register object = InputRegisterAt(invoke, 1);
5270 vixl32::Register index = InputRegisterAt(invoke, 2);
5271
5272 MemberOffset class_offset = mirror::Object::ClassOffset();
5273 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5274 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5275 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5276
5277 __ Bind(GetByteArrayViewCheckLabel());
5278
5279 VarHandleTarget target = GetVarHandleTarget(invoke);
5280 {
5281 // Use the offset temporary register. It is not used yet at this point.
5282 vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
5283
5284 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5285 vixl32::Register temp2 = temps.Acquire();
5286
5287 // The main path checked that the coordinateType0 is an array class that matches
5288 // the class of the actual coordinate argument but it does not match the value type.
5289 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5290 __ Ldr(temp, MemOperand(varhandle, class_offset.Int32Value()));
5291 codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5292 __ Cmp(temp, temp2);
5293 __ B(ne, GetEntryLabel());
5294
5295 // Check for array index out of bounds.
5296 __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
5297 if (!temp.IsLow()) {
5298 // Avoid using the 32-bit `cmp temp, #imm` in IT block by loading `size` into `temp2`.
5299 __ Mov(temp2, size_operand);
5300 }
5301 __ Subs(temp, temp, index);
5302 {
5303 // Use ExactAssemblyScope here because we are using IT.
5304 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
5305 2 * k16BitT32InstructionSizeInBytes);
5306 __ it(hs);
5307 if (temp.IsLow()) {
5308 __ cmp(hs, temp, size_operand);
5309 } else {
5310 __ cmp(hs, temp, temp2);
5311 }
5312 }
5313 __ B(lo, GetEntryLabel());
5314
5315 // Construct the target.
5316 __ Add(target.offset, index, data_offset.Int32Value()); // Note: `temp` cannot be used below.
5317
5318 // Alignment check. For unaligned access, go to the runtime.
5319 DCHECK(IsPowerOfTwo(size));
5320 __ Tst(target.offset, dchecked_integral_cast<int32_t>(size - 1u));
5321 __ B(ne, GetEntryLabel());
5322
5323 // Byte order check. For native byte order return to the main path.
5324 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
5325 HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
5326 if (arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern()) {
5327 // There is no reason to differentiate between native byte order and byte-swap
5328 // for setting a zero bit pattern. Just return to the main path.
5329 __ B(GetNativeByteOrderLabel());
5330 return;
5331 }
5332 }
5333 __ Ldr(temp2, MemOperand(varhandle, native_byte_order_offset.Int32Value()));
5334 __ Cmp(temp2, 0);
5335 __ B(ne, GetNativeByteOrderLabel());
5336 }
5337
5338 switch (access_mode_template) {
5339 case mirror::VarHandle::AccessModeTemplate::kGet:
5340 GenerateVarHandleGet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5341 break;
5342 case mirror::VarHandle::AccessModeTemplate::kSet:
5343 GenerateVarHandleSet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5344 break;
5345 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5346 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5347 GenerateVarHandleCompareAndSetOrExchange(
5348 invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5349 break;
5350 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5351 GenerateVarHandleGetAndUpdate(
5352 invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5353 break;
5354 }
5355 __ B(GetExitLabel());
5356 }
5357
5358 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
5359 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
5360 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
5361 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongDivideUnsigned)
5362 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
5363 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
5364 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
5365 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat)
5366 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf)
5367 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor)
5368 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil)
5369 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint)
5370 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater)
5371 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals)
5372 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less)
5373 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals)
5374 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMultiplyHigh)
5375
5376 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
5377 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
5378 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
5379 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
5380 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
5381 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject);
5382 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString);
5383 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence);
5384 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray);
5385 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean);
5386 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar);
5387 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt);
5388 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong);
5389 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat);
5390 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble);
5391 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
5392 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
5393
5394 // 1.8.
5395 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
5396 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
5397 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
5398 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
5399 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
5400
5401 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvokeExact)
5402 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvoke)
5403
5404 UNREACHABLE_INTRINSICS(ARMVIXL)
5405
5406 #undef __
5407
5408 } // namespace arm
5409 } // namespace art
5410