1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm_vixl.h"
18
19 #include "arch/arm/callee_save_frame_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "art_method.h"
22 #include "code_generator_arm_vixl.h"
23 #include "common_arm.h"
24 #include "heap_poisoning.h"
25 #include "intrinsic_objects.h"
26 #include "intrinsics.h"
27 #include "intrinsics_utils.h"
28 #include "lock_word.h"
29 #include "mirror/array-inl.h"
30 #include "mirror/object_array-inl.h"
31 #include "mirror/reference.h"
32 #include "mirror/string-inl.h"
33 #include "scoped_thread_state_change-inl.h"
34 #include "thread-current-inl.h"
35 #include "well_known_classes.h"
36
37 #include "aarch32/constants-aarch32.h"
38
39 namespace art HIDDEN {
40 namespace arm {
41
42 #define __ assembler->GetVIXLAssembler()->
43
44 using helpers::DRegisterFrom;
45 using helpers::HighRegisterFrom;
46 using helpers::InputDRegisterAt;
47 using helpers::InputRegisterAt;
48 using helpers::InputSRegisterAt;
49 using helpers::Int32ConstantFrom;
50 using helpers::LocationFrom;
51 using helpers::LowRegisterFrom;
52 using helpers::LowSRegisterFrom;
53 using helpers::HighSRegisterFrom;
54 using helpers::OperandFrom;
55 using helpers::OutputDRegister;
56 using helpers::OutputRegister;
57 using helpers::RegisterFrom;
58 using helpers::SRegisterFrom;
59
60 using namespace vixl::aarch32; // NOLINT(build/namespaces)
61
62 using vixl::ExactAssemblyScope;
63 using vixl::CodeBufferCheckScope;
64
GetAssembler()65 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
66 return codegen_->GetAssembler();
67 }
68
GetAllocator()69 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
70 return codegen_->GetGraph()->GetAllocator();
71 }
72
73 using IntrinsicSlowPathARMVIXL = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARMVIXL,
74 SlowPathCodeARMVIXL,
75 ArmVIXLAssembler>;
76
77 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
78 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
79 public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)80 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
81 : SlowPathCodeARMVIXL(instruction) {
82 }
83
EmitNativeCode(CodeGenerator * codegen)84 void EmitNativeCode(CodeGenerator* codegen) override {
85 DCHECK(codegen->EmitBakerReadBarrier());
86 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
87 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
88 LocationSummary* locations = instruction_->GetLocations();
89 DCHECK(locations->CanCall());
90 DCHECK(instruction_->IsInvokeStaticOrDirect())
91 << "Unexpected instruction in read barrier arraycopy slow path: "
92 << instruction_->DebugName();
93 DCHECK(instruction_->GetLocations()->Intrinsified());
94 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
95
96 DataType::Type type = DataType::Type::kReference;
97 const int32_t element_size = DataType::Size(type);
98
99 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
100 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
101 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
102 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
103
104 __ Bind(GetEntryLabel());
105 // The source range and destination pointer were initialized before entering the slow-path.
106 vixl32::Label loop;
107 __ Bind(&loop);
108 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
109 assembler->MaybeUnpoisonHeapReference(tmp);
110 // TODO: Inline the mark bit check before calling the runtime?
111 // tmp = ReadBarrier::Mark(tmp);
112 // No need to save live registers; it's taken care of by the
113 // entrypoint. Also, there is no need to update the stack mask,
114 // as this runtime call will not trigger a garbage collection.
115 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
116 // explanations.)
117 DCHECK(!tmp.IsSP());
118 DCHECK(!tmp.IsLR());
119 DCHECK(!tmp.IsPC());
120 // IP is used internally by the ReadBarrierMarkRegX entry point
121 // as a temporary (and not preserved). It thus cannot be used by
122 // any live register in this slow path.
123 DCHECK(!src_curr_addr.Is(ip));
124 DCHECK(!dst_curr_addr.Is(ip));
125 DCHECK(!src_stop_addr.Is(ip));
126 DCHECK(!tmp.Is(ip));
127 DCHECK(tmp.IsRegister()) << tmp;
128 // TODO: Load the entrypoint once before the loop, instead of
129 // loading it at every iteration.
130 int32_t entry_point_offset =
131 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
132 // This runtime call does not require a stack map.
133 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
134 assembler->MaybePoisonHeapReference(tmp);
135 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
136 __ Cmp(src_curr_addr, src_stop_addr);
137 __ B(ne, &loop, /* is_far_target= */ false);
138 __ B(GetExitLabel());
139 }
140
GetDescription() const141 const char* GetDescription() const override {
142 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
143 }
144
145 private:
146 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
147 };
148
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)149 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
150 : allocator_(codegen->GetGraph()->GetAllocator()),
151 codegen_(codegen),
152 assembler_(codegen->GetAssembler()),
153 features_(codegen->GetInstructionSetFeatures()) {}
154
TryDispatch(HInvoke * invoke)155 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
156 Dispatch(invoke);
157 LocationSummary* res = invoke->GetLocations();
158 if (res == nullptr) {
159 return false;
160 }
161 return res->Intrinsified();
162 }
163
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)164 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
165 LocationSummary* locations =
166 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
167 locations->SetInAt(0, Location::RequiresFpuRegister());
168 locations->SetOut(Location::RequiresRegister());
169 }
170
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)171 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
172 LocationSummary* locations =
173 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
174 locations->SetInAt(0, Location::RequiresRegister());
175 locations->SetOut(Location::RequiresFpuRegister());
176 }
177
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)178 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
179 Location input = locations->InAt(0);
180 Location output = locations->Out();
181 if (is64bit) {
182 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
183 } else {
184 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
185 }
186 }
187
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)188 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
189 Location input = locations->InAt(0);
190 Location output = locations->Out();
191 if (is64bit) {
192 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
193 } else {
194 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
195 }
196 }
197
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)198 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
199 CreateFPToIntLocations(allocator_, invoke);
200 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)201 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
202 CreateIntToFPLocations(allocator_, invoke);
203 }
204
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)205 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
206 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
207 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)208 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
209 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
210 }
211
VisitFloatFloatToRawIntBits(HInvoke * invoke)212 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
213 CreateFPToIntLocations(allocator_, invoke);
214 }
VisitFloatIntBitsToFloat(HInvoke * invoke)215 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
216 CreateIntToFPLocations(allocator_, invoke);
217 }
218
VisitFloatFloatToRawIntBits(HInvoke * invoke)219 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
220 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
221 }
VisitFloatIntBitsToFloat(HInvoke * invoke)222 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
223 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
224 }
225
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)226 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
227 LocationSummary* locations =
228 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
229 locations->SetInAt(0, Location::RequiresRegister());
230 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
231 }
232
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)233 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
234 LocationSummary* locations =
235 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
236 locations->SetInAt(0, Location::RequiresRegister());
237 locations->SetInAt(1, Location::RequiresRegister());
238 // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
239 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
240 }
241
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)242 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
243 LocationSummary* locations =
244 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
245 locations->SetInAt(0, Location::RequiresRegister());
246 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
247 }
248
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)249 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
250 LocationSummary* locations =
251 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
252 locations->SetInAt(0, Location::RequiresFpuRegister());
253 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
254 }
255
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)256 static void GenNumberOfLeadingZeros(HInvoke* invoke,
257 DataType::Type type,
258 CodeGeneratorARMVIXL* codegen) {
259 ArmVIXLAssembler* assembler = codegen->GetAssembler();
260 LocationSummary* locations = invoke->GetLocations();
261 Location in = locations->InAt(0);
262 vixl32::Register out = RegisterFrom(locations->Out());
263
264 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
265
266 if (type == DataType::Type::kInt64) {
267 vixl32::Register in_reg_lo = LowRegisterFrom(in);
268 vixl32::Register in_reg_hi = HighRegisterFrom(in);
269 vixl32::Label end;
270 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
271 __ Clz(out, in_reg_hi);
272 __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
273 __ Clz(out, in_reg_lo);
274 __ Add(out, out, 32);
275 if (end.IsReferenced()) {
276 __ Bind(&end);
277 }
278 } else {
279 __ Clz(out, RegisterFrom(in));
280 }
281 }
282
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)283 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
284 CreateIntToIntLocations(allocator_, invoke);
285 }
286
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)287 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
288 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
289 }
290
VisitLongNumberOfLeadingZeros(HInvoke * invoke)291 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
292 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
293 }
294
VisitLongNumberOfLeadingZeros(HInvoke * invoke)295 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
296 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
297 }
298
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)299 static void GenNumberOfTrailingZeros(HInvoke* invoke,
300 DataType::Type type,
301 CodeGeneratorARMVIXL* codegen) {
302 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
303
304 ArmVIXLAssembler* assembler = codegen->GetAssembler();
305 LocationSummary* locations = invoke->GetLocations();
306 vixl32::Register out = RegisterFrom(locations->Out());
307
308 if (type == DataType::Type::kInt64) {
309 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
310 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
311 vixl32::Label end;
312 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
313 __ Rbit(out, in_reg_lo);
314 __ Clz(out, out);
315 __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
316 __ Rbit(out, in_reg_hi);
317 __ Clz(out, out);
318 __ Add(out, out, 32);
319 if (end.IsReferenced()) {
320 __ Bind(&end);
321 }
322 } else {
323 vixl32::Register in = RegisterFrom(locations->InAt(0));
324 __ Rbit(out, in);
325 __ Clz(out, out);
326 }
327 }
328
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)329 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
330 CreateIntToIntLocations(allocator_, invoke);
331 }
332
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)333 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
334 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
335 }
336
VisitLongNumberOfTrailingZeros(HInvoke * invoke)337 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
338 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
339 }
340
VisitLongNumberOfTrailingZeros(HInvoke * invoke)341 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
342 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
343 }
344
VisitMathSqrt(HInvoke * invoke)345 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
346 CreateFPToFPLocations(allocator_, invoke);
347 }
348
VisitMathSqrt(HInvoke * invoke)349 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
350 ArmVIXLAssembler* assembler = GetAssembler();
351 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
352 }
353
VisitMathRint(HInvoke * invoke)354 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
355 if (features_.HasARMv8AInstructions()) {
356 CreateFPToFPLocations(allocator_, invoke);
357 }
358 }
359
VisitMathRint(HInvoke * invoke)360 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
361 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
362 ArmVIXLAssembler* assembler = GetAssembler();
363 __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
364 }
365
VisitMathRoundFloat(HInvoke * invoke)366 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
367 if (features_.HasARMv8AInstructions()) {
368 LocationSummary* locations =
369 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
370 locations->SetInAt(0, Location::RequiresFpuRegister());
371 locations->SetOut(Location::RequiresRegister());
372 locations->AddTemp(Location::RequiresFpuRegister());
373 }
374 }
375
VisitMathRoundFloat(HInvoke * invoke)376 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
377 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
378
379 ArmVIXLAssembler* assembler = GetAssembler();
380 vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
381 vixl32::Register out_reg = OutputRegister(invoke);
382 vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
383 vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
384 vixl32::Label done;
385 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
386
387 // Round to nearest integer, ties away from zero.
388 __ Vcvta(S32, F32, temp1, in_reg);
389 __ Vmov(out_reg, temp1);
390
391 // For positive, zero or NaN inputs, rounding is done.
392 __ Cmp(out_reg, 0);
393 __ B(ge, final_label, /* is_far_target= */ false);
394
395 // Handle input < 0 cases.
396 // If input is negative but not a tie, previous result (round to nearest) is valid.
397 // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
398 __ Vrinta(F32, temp1, in_reg);
399 __ Vmov(temp2, 0.5);
400 __ Vsub(F32, temp1, in_reg, temp1);
401 __ Vcmp(F32, temp1, temp2);
402 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
403 {
404 // Use ExactAssemblyScope here because we are using IT.
405 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
406 2 * kMaxInstructionSizeInBytes,
407 CodeBufferCheckScope::kMaximumSize);
408 __ it(eq);
409 __ add(eq, out_reg, out_reg, 1);
410 }
411
412 if (done.IsReferenced()) {
413 __ Bind(&done);
414 }
415 }
416
VisitMemoryPeekByte(HInvoke * invoke)417 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
418 CreateIntToIntLocations(allocator_, invoke);
419 }
420
VisitMemoryPeekByte(HInvoke * invoke)421 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
422 ArmVIXLAssembler* assembler = GetAssembler();
423 // Ignore upper 4B of long address.
424 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
425 }
426
VisitMemoryPeekIntNative(HInvoke * invoke)427 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
428 CreateIntToIntLocations(allocator_, invoke);
429 }
430
VisitMemoryPeekIntNative(HInvoke * invoke)431 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
432 ArmVIXLAssembler* assembler = GetAssembler();
433 // Ignore upper 4B of long address.
434 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
435 }
436
VisitMemoryPeekLongNative(HInvoke * invoke)437 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
438 CreateIntToIntLocations(allocator_, invoke);
439 }
440
VisitMemoryPeekLongNative(HInvoke * invoke)441 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
442 ArmVIXLAssembler* assembler = GetAssembler();
443 // Ignore upper 4B of long address.
444 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
445 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
446 // exception. So we can't use ldrd as addr may be unaligned.
447 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
448 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
449 if (addr.Is(lo)) {
450 __ Ldr(hi, MemOperand(addr, 4));
451 __ Ldr(lo, MemOperand(addr));
452 } else {
453 __ Ldr(lo, MemOperand(addr));
454 __ Ldr(hi, MemOperand(addr, 4));
455 }
456 }
457
VisitMemoryPeekShortNative(HInvoke * invoke)458 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
459 CreateIntToIntLocations(allocator_, invoke);
460 }
461
VisitMemoryPeekShortNative(HInvoke * invoke)462 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
463 ArmVIXLAssembler* assembler = GetAssembler();
464 // Ignore upper 4B of long address.
465 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
466 }
467
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)468 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
469 LocationSummary* locations =
470 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
471 locations->SetInAt(0, Location::RequiresRegister());
472 locations->SetInAt(1, Location::RequiresRegister());
473 }
474
VisitMemoryPokeByte(HInvoke * invoke)475 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
476 CreateIntIntToVoidLocations(allocator_, invoke);
477 }
478
VisitMemoryPokeByte(HInvoke * invoke)479 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
480 ArmVIXLAssembler* assembler = GetAssembler();
481 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
482 }
483
VisitMemoryPokeIntNative(HInvoke * invoke)484 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
485 CreateIntIntToVoidLocations(allocator_, invoke);
486 }
487
VisitMemoryPokeIntNative(HInvoke * invoke)488 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
489 ArmVIXLAssembler* assembler = GetAssembler();
490 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
491 }
492
VisitMemoryPokeLongNative(HInvoke * invoke)493 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
494 CreateIntIntToVoidLocations(allocator_, invoke);
495 }
496
VisitMemoryPokeLongNative(HInvoke * invoke)497 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
498 ArmVIXLAssembler* assembler = GetAssembler();
499 // Ignore upper 4B of long address.
500 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
501 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
502 // exception. So we can't use ldrd as addr may be unaligned.
503 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
504 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
505 }
506
VisitMemoryPokeShortNative(HInvoke * invoke)507 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
508 CreateIntIntToVoidLocations(allocator_, invoke);
509 }
510
VisitMemoryPokeShortNative(HInvoke * invoke)511 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
512 ArmVIXLAssembler* assembler = GetAssembler();
513 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
514 }
515
VisitThreadCurrentThread(HInvoke * invoke)516 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
517 LocationSummary* locations =
518 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
519 locations->SetOut(Location::RequiresRegister());
520 }
521
VisitThreadCurrentThread(HInvoke * invoke)522 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
523 ArmVIXLAssembler* assembler = GetAssembler();
524 __ Ldr(OutputRegister(invoke),
525 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
526 }
527
VisitStringCompareTo(HInvoke * invoke)528 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
529 // The inputs plus one temp.
530 LocationSummary* locations =
531 new (allocator_) LocationSummary(invoke,
532 invoke->InputAt(1)->CanBeNull()
533 ? LocationSummary::kCallOnSlowPath
534 : LocationSummary::kNoCall,
535 kIntrinsified);
536 locations->SetInAt(0, Location::RequiresRegister());
537 locations->SetInAt(1, Location::RequiresRegister());
538 locations->AddTemp(Location::RequiresRegister());
539 locations->AddTemp(Location::RequiresRegister());
540 locations->AddTemp(Location::RequiresRegister());
541 // Need temporary registers for String compression's feature.
542 if (mirror::kUseStringCompression) {
543 locations->AddTemp(Location::RequiresRegister());
544 }
545 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
546 }
547
548 // Forward declaration.
549 //
550 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
551 // by the compiler for every C++ function, and if this function gets inlined in
552 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
553 // build failure. That is the reason why NO_INLINE attribute is used.
554 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
555 HInvoke* invoke,
556 vixl32::Label* end,
557 vixl32::Label* different_compression);
558
VisitStringCompareTo(HInvoke * invoke)559 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
560 ArmVIXLAssembler* assembler = GetAssembler();
561 LocationSummary* locations = invoke->GetLocations();
562
563 const vixl32::Register str = InputRegisterAt(invoke, 0);
564 const vixl32::Register arg = InputRegisterAt(invoke, 1);
565 const vixl32::Register out = OutputRegister(invoke);
566
567 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
568 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
569 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
570 vixl32::Register temp3;
571 if (mirror::kUseStringCompression) {
572 temp3 = RegisterFrom(locations->GetTemp(3));
573 }
574
575 vixl32::Label end;
576 vixl32::Label different_compression;
577
578 // Get offsets of count and value fields within a string object.
579 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
580
581 // Note that the null check must have been done earlier.
582 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
583
584 // Take slow path and throw if input can be and is null.
585 SlowPathCodeARMVIXL* slow_path = nullptr;
586 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
587 if (can_slow_path) {
588 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
589 codegen_->AddSlowPath(slow_path);
590 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
591 }
592
593 // Reference equality check, return 0 if same reference.
594 __ Subs(out, str, arg);
595 __ B(eq, &end);
596
597 if (mirror::kUseStringCompression) {
598 // Load `count` fields of this and argument strings.
599 __ Ldr(temp3, MemOperand(str, count_offset));
600 __ Ldr(temp2, MemOperand(arg, count_offset));
601 // Extract lengths from the `count` fields.
602 __ Lsr(temp0, temp3, 1u);
603 __ Lsr(temp1, temp2, 1u);
604 } else {
605 // Load lengths of this and argument strings.
606 __ Ldr(temp0, MemOperand(str, count_offset));
607 __ Ldr(temp1, MemOperand(arg, count_offset));
608 }
609 // out = length diff.
610 __ Subs(out, temp0, temp1);
611 // temp0 = min(len(str), len(arg)).
612
613 {
614 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
615 2 * kMaxInstructionSizeInBytes,
616 CodeBufferCheckScope::kMaximumSize);
617
618 __ it(gt);
619 __ mov(gt, temp0, temp1);
620 }
621
622 // Shorter string is empty?
623 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
624 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
625 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
626
627 if (mirror::kUseStringCompression) {
628 // Check if both strings using same compression style to use this comparison loop.
629 __ Eors(temp2, temp2, temp3);
630 __ Lsrs(temp2, temp2, 1u);
631 __ B(cs, &different_compression);
632 // For string compression, calculate the number of bytes to compare (not chars).
633 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
634 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
635
636 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
637 2 * kMaxInstructionSizeInBytes,
638 CodeBufferCheckScope::kMaximumSize);
639
640 __ it(ne);
641 __ add(ne, temp0, temp0, temp0);
642 }
643
644
645 GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
646
647 __ Bind(&end);
648
649 if (can_slow_path) {
650 __ Bind(slow_path->GetExitLabel());
651 }
652 }
653
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)654 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
655 HInvoke* invoke,
656 vixl32::Label* end,
657 vixl32::Label* different_compression) {
658 LocationSummary* locations = invoke->GetLocations();
659
660 const vixl32::Register str = InputRegisterAt(invoke, 0);
661 const vixl32::Register arg = InputRegisterAt(invoke, 1);
662 const vixl32::Register out = OutputRegister(invoke);
663
664 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
665 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
666 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
667 vixl32::Register temp3;
668 if (mirror::kUseStringCompression) {
669 temp3 = RegisterFrom(locations->GetTemp(3));
670 }
671
672 vixl32::Label loop;
673 vixl32::Label find_char_diff;
674
675 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
676 // Store offset of string value in preparation for comparison loop.
677 __ Mov(temp1, value_offset);
678
679 // Assertions that must hold in order to compare multiple characters at a time.
680 CHECK_ALIGNED(value_offset, 8);
681 static_assert(IsAligned<8>(kObjectAlignment),
682 "String data must be 8-byte aligned for unrolled CompareTo loop.");
683
684 const unsigned char_size = DataType::Size(DataType::Type::kUint16);
685 DCHECK_EQ(char_size, 2u);
686
687 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
688
689 vixl32::Label find_char_diff_2nd_cmp;
690 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
691 __ Bind(&loop);
692 vixl32::Register temp_reg = temps.Acquire();
693 __ Ldr(temp_reg, MemOperand(str, temp1));
694 __ Ldr(temp2, MemOperand(arg, temp1));
695 __ Cmp(temp_reg, temp2);
696 __ B(ne, &find_char_diff, /* is_far_target= */ false);
697 __ Add(temp1, temp1, char_size * 2);
698
699 __ Ldr(temp_reg, MemOperand(str, temp1));
700 __ Ldr(temp2, MemOperand(arg, temp1));
701 __ Cmp(temp_reg, temp2);
702 __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
703 __ Add(temp1, temp1, char_size * 2);
704 // With string compression, we have compared 8 bytes, otherwise 4 chars.
705 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
706 __ B(hi, &loop, /* is_far_target= */ false);
707 __ B(end);
708
709 __ Bind(&find_char_diff_2nd_cmp);
710 if (mirror::kUseStringCompression) {
711 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
712 __ B(ls, end, /* is_far_target= */ false); // Was the second comparison fully beyond the end?
713 } else {
714 // Without string compression, we can start treating temp0 as signed
715 // and rely on the signed comparison below.
716 __ Sub(temp0, temp0, 2);
717 }
718
719 // Find the single character difference.
720 __ Bind(&find_char_diff);
721 // Get the bit position of the first character that differs.
722 __ Eor(temp1, temp2, temp_reg);
723 __ Rbit(temp1, temp1);
724 __ Clz(temp1, temp1);
725
726 // temp0 = number of characters remaining to compare.
727 // (Without string compression, it could be < 1 if a difference is found by the second CMP
728 // in the comparison loop, and after the end of the shorter string data).
729
730 // Without string compression (temp1 >> 4) = character where difference occurs between the last
731 // two words compared, in the interval [0,1].
732 // (0 for low half-word different, 1 for high half-word different).
733 // With string compression, (temp1 << 3) = byte where the difference occurs,
734 // in the interval [0,3].
735
736 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
737 // the remaining string data, so just return length diff (out).
738 // The comparison is unsigned for string compression, otherwise signed.
739 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
740 __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
741
742 // Extract the characters and calculate the difference.
743 if (mirror::kUseStringCompression) {
744 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
745 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
746 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
747 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
748 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
749 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
750 __ Lsr(temp2, temp2, temp1); // Extract second character.
751 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
752 __ Lsr(out, temp_reg, temp1); // Extract first character.
753 __ And(temp2, temp2, temp3);
754 __ And(out, out, temp3);
755 } else {
756 __ Bic(temp1, temp1, 0xf);
757 __ Lsr(temp2, temp2, temp1);
758 __ Lsr(out, temp_reg, temp1);
759 __ Movt(temp2, 0);
760 __ Movt(out, 0);
761 }
762
763 __ Sub(out, out, temp2);
764 temps.Release(temp_reg);
765
766 if (mirror::kUseStringCompression) {
767 __ B(end);
768 __ Bind(different_compression);
769
770 // Comparison for different compression style.
771 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
772 DCHECK_EQ(c_char_size, 1u);
773
774 // We want to free up the temp3, currently holding `str.count`, for comparison.
775 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
776 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
777 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
778 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
779 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
780 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
781 __ Mov(temp1, str);
782 __ Mov(temp2, arg);
783 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
784 {
785 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
786 3 * kMaxInstructionSizeInBytes,
787 CodeBufferCheckScope::kMaximumSize);
788 __ itt(cs); // Interleave with selection of temp1 and temp2.
789 __ mov(cs, temp1, arg); // Preserves flags.
790 __ mov(cs, temp2, str); // Preserves flags.
791 }
792 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
793
794 // Adjust temp1 and temp2 from string pointers to data pointers.
795 __ Add(temp1, temp1, value_offset);
796 __ Add(temp2, temp2, value_offset);
797
798 vixl32::Label different_compression_loop;
799 vixl32::Label different_compression_diff;
800
801 // Main loop for different compression.
802 temp_reg = temps.Acquire();
803 __ Bind(&different_compression_loop);
804 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
805 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
806 __ Cmp(temp_reg, temp3);
807 __ B(ne, &different_compression_diff, /* is_far_target= */ false);
808 __ Subs(temp0, temp0, 2);
809 __ B(hi, &different_compression_loop, /* is_far_target= */ false);
810 __ B(end);
811
812 // Calculate the difference.
813 __ Bind(&different_compression_diff);
814 __ Sub(out, temp_reg, temp3);
815 temps.Release(temp_reg);
816 // Flip the difference if the `arg` is compressed.
817 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
818 __ Lsrs(temp0, temp0, 1u);
819 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
820 "Expecting 0=compressed, 1=uncompressed");
821
822 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
823 2 * kMaxInstructionSizeInBytes,
824 CodeBufferCheckScope::kMaximumSize);
825 __ it(cc);
826 __ rsb(cc, out, out, 0);
827 }
828 }
829
830 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
831 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
832 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
833 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
834 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
835 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
836
GetConstString(HInstruction * candidate,uint32_t * utf16_length)837 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
838 if (candidate->IsLoadString()) {
839 HLoadString* load_string = candidate->AsLoadString();
840 const DexFile& dex_file = load_string->GetDexFile();
841 return dex_file.GetStringDataAndUtf16Length(load_string->GetStringIndex(), utf16_length);
842 }
843 return nullptr;
844 }
845
VisitStringEquals(HInvoke * invoke)846 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
847 LocationSummary* locations =
848 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
849 InvokeRuntimeCallingConventionARMVIXL calling_convention;
850 locations->SetInAt(0, Location::RequiresRegister());
851 locations->SetInAt(1, Location::RequiresRegister());
852
853 // Temporary registers to store lengths of strings and for calculations.
854 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
855 locations->AddTemp(LocationFrom(r0));
856
857 // For the generic implementation and for long const strings we need an extra temporary.
858 // We do not need it for short const strings, up to 4 bytes, see code generation below.
859 uint32_t const_string_length = 0u;
860 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
861 if (const_string == nullptr) {
862 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
863 }
864 bool is_compressed =
865 mirror::kUseStringCompression &&
866 const_string != nullptr &&
867 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
868 if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
869 locations->AddTemp(Location::RequiresRegister());
870 }
871
872 // TODO: If the String.equals() is used only for an immediately following HIf, we can
873 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
874 // Then we shall need an extra temporary register instead of the output register.
875 locations->SetOut(Location::RequiresRegister());
876 }
877
VisitStringEquals(HInvoke * invoke)878 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
879 ArmVIXLAssembler* assembler = GetAssembler();
880 LocationSummary* locations = invoke->GetLocations();
881
882 vixl32::Register str = InputRegisterAt(invoke, 0);
883 vixl32::Register arg = InputRegisterAt(invoke, 1);
884 vixl32::Register out = OutputRegister(invoke);
885
886 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
887
888 vixl32::Label loop;
889 vixl32::Label end;
890 vixl32::Label return_true;
891 vixl32::Label return_false;
892 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
893
894 // Get offsets of count, value, and class fields within a string object.
895 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
896 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
897 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
898
899 // Note that the null check must have been done earlier.
900 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
901
902 StringEqualsOptimizations optimizations(invoke);
903 if (!optimizations.GetArgumentNotNull()) {
904 // Check if input is null, return false if it is.
905 __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
906 }
907
908 // Reference equality check, return true if same reference.
909 __ Cmp(str, arg);
910 __ B(eq, &return_true, /* is_far_target= */ false);
911
912 if (!optimizations.GetArgumentIsString()) {
913 // Instanceof check for the argument by comparing class fields.
914 // All string objects must have the same type since String cannot be subclassed.
915 // Receiver must be a string object, so its class field is equal to all strings' class fields.
916 // If the argument is a string object, its class field must be equal to receiver's class field.
917 //
918 // As the String class is expected to be non-movable, we can read the class
919 // field from String.equals' arguments without read barriers.
920 AssertNonMovableStringClass();
921 // /* HeapReference<Class> */ temp = str->klass_
922 __ Ldr(temp, MemOperand(str, class_offset));
923 // /* HeapReference<Class> */ out = arg->klass_
924 __ Ldr(out, MemOperand(arg, class_offset));
925 // Also, because we use the previously loaded class references only in the
926 // following comparison, we don't need to unpoison them.
927 __ Cmp(temp, out);
928 __ B(ne, &return_false, /* is_far_target= */ false);
929 }
930
931 // Check if one of the inputs is a const string. Do not special-case both strings
932 // being const, such cases should be handled by constant folding if needed.
933 uint32_t const_string_length = 0u;
934 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
935 if (const_string == nullptr) {
936 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
937 if (const_string != nullptr) {
938 std::swap(str, arg); // Make sure the const string is in `str`.
939 }
940 }
941 bool is_compressed =
942 mirror::kUseStringCompression &&
943 const_string != nullptr &&
944 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
945
946 if (const_string != nullptr) {
947 // Load `count` field of the argument string and check if it matches the const string.
948 // Also compares the compression style, if differs return false.
949 __ Ldr(temp, MemOperand(arg, count_offset));
950 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
951 __ B(ne, &return_false, /* is_far_target= */ false);
952 } else {
953 // Load `count` fields of this and argument strings.
954 __ Ldr(temp, MemOperand(str, count_offset));
955 __ Ldr(out, MemOperand(arg, count_offset));
956 // Check if `count` fields are equal, return false if they're not.
957 // Also compares the compression style, if differs return false.
958 __ Cmp(temp, out);
959 __ B(ne, &return_false, /* is_far_target= */ false);
960 }
961
962 // Assertions that must hold in order to compare strings 4 bytes at a time.
963 // Ok to do this because strings are zero-padded to kObjectAlignment.
964 DCHECK_ALIGNED(value_offset, 4);
965 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
966
967 if (const_string != nullptr &&
968 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
969 : kShortConstStringEqualsCutoffInBytes / 2u)) {
970 // Load and compare the contents. Though we know the contents of the short const string
971 // at compile time, materializing constants may be more code than loading from memory.
972 int32_t offset = value_offset;
973 size_t remaining_bytes =
974 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
975 while (remaining_bytes > sizeof(uint32_t)) {
976 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
977 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
978 vixl32::Register temp2 = scratch_scope.Acquire();
979 __ Ldrd(temp, temp1, MemOperand(str, offset));
980 __ Ldrd(temp2, out, MemOperand(arg, offset));
981 __ Cmp(temp, temp2);
982 __ B(ne, &return_false, /* is_far_target= */ false);
983 __ Cmp(temp1, out);
984 __ B(ne, &return_false, /* is_far_target= */ false);
985 offset += 2u * sizeof(uint32_t);
986 remaining_bytes -= 2u * sizeof(uint32_t);
987 }
988 if (remaining_bytes != 0u) {
989 __ Ldr(temp, MemOperand(str, offset));
990 __ Ldr(out, MemOperand(arg, offset));
991 __ Cmp(temp, out);
992 __ B(ne, &return_false, /* is_far_target= */ false);
993 }
994 } else {
995 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
996 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
997 "Expecting 0=compressed, 1=uncompressed");
998 __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
999
1000 if (mirror::kUseStringCompression) {
1001 // For string compression, calculate the number of bytes to compare (not chars).
1002 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1003 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
1004 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1005 2 * kMaxInstructionSizeInBytes,
1006 CodeBufferCheckScope::kMaximumSize);
1007 __ it(cs); // If uncompressed,
1008 __ add(cs, temp, temp, temp); // double the byte count.
1009 }
1010
1011 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1012 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1013 vixl32::Register temp2 = scratch_scope.Acquire();
1014
1015 // Store offset of string value in preparation for comparison loop.
1016 __ Mov(temp1, value_offset);
1017
1018 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1019 __ Bind(&loop);
1020 __ Ldr(out, MemOperand(str, temp1));
1021 __ Ldr(temp2, MemOperand(arg, temp1));
1022 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1023 __ Cmp(out, temp2);
1024 __ B(ne, &return_false, /* is_far_target= */ false);
1025 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1026 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1027 __ B(hi, &loop, /* is_far_target= */ false);
1028 }
1029
1030 // Return true and exit the function.
1031 // If loop does not result in returning false, we return true.
1032 __ Bind(&return_true);
1033 __ Mov(out, 1);
1034 __ B(final_label);
1035
1036 // Return false and exit the function.
1037 __ Bind(&return_false);
1038 __ Mov(out, 0);
1039
1040 if (end.IsReferenced()) {
1041 __ Bind(&end);
1042 }
1043 }
1044
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1045 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1046 ArmVIXLAssembler* assembler,
1047 CodeGeneratorARMVIXL* codegen,
1048 bool start_at_zero) {
1049 LocationSummary* locations = invoke->GetLocations();
1050
1051 // Note that the null check must have been done earlier.
1052 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1053
1054 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1055 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1056 SlowPathCodeARMVIXL* slow_path = nullptr;
1057 HInstruction* code_point = invoke->InputAt(1);
1058 if (code_point->IsIntConstant()) {
1059 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1060 std::numeric_limits<uint16_t>::max()) {
1061 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1062 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1063 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1064 codegen->AddSlowPath(slow_path);
1065 __ B(slow_path->GetEntryLabel());
1066 __ Bind(slow_path->GetExitLabel());
1067 return;
1068 }
1069 } else if (code_point->GetType() != DataType::Type::kUint16) {
1070 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1071 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1072 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1073 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1074 codegen->AddSlowPath(slow_path);
1075 __ B(hs, slow_path->GetEntryLabel());
1076 }
1077
1078 if (start_at_zero) {
1079 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1080 DCHECK(tmp_reg.Is(r2));
1081 // Start-index = 0.
1082 __ Mov(tmp_reg, 0);
1083 }
1084
1085 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1086 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1087
1088 if (slow_path != nullptr) {
1089 __ Bind(slow_path->GetExitLabel());
1090 }
1091 }
1092
VisitStringIndexOf(HInvoke * invoke)1093 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1094 LocationSummary* locations = new (allocator_) LocationSummary(
1095 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1096 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1097 // best to align the inputs accordingly.
1098 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1099 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1100 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1101 locations->SetOut(LocationFrom(r0));
1102
1103 // Need to send start-index=0.
1104 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1105 }
1106
VisitStringIndexOf(HInvoke * invoke)1107 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1108 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1109 }
1110
VisitStringIndexOfAfter(HInvoke * invoke)1111 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1112 LocationSummary* locations = new (allocator_) LocationSummary(
1113 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1114 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1115 // best to align the inputs accordingly.
1116 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1117 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1118 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1119 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1120 locations->SetOut(LocationFrom(r0));
1121 }
1122
VisitStringIndexOfAfter(HInvoke * invoke)1123 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1124 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1125 }
1126
VisitStringNewStringFromBytes(HInvoke * invoke)1127 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1128 LocationSummary* locations = new (allocator_) LocationSummary(
1129 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1130 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1131 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1132 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1133 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1134 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1135 locations->SetOut(LocationFrom(r0));
1136 }
1137
VisitStringNewStringFromBytes(HInvoke * invoke)1138 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1139 ArmVIXLAssembler* assembler = GetAssembler();
1140 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1141 __ Cmp(byte_array, 0);
1142 SlowPathCodeARMVIXL* slow_path =
1143 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1144 codegen_->AddSlowPath(slow_path);
1145 __ B(eq, slow_path->GetEntryLabel());
1146
1147 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1148 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1149 __ Bind(slow_path->GetExitLabel());
1150 }
1151
VisitStringNewStringFromChars(HInvoke * invoke)1152 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1153 LocationSummary* locations =
1154 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1155 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1156 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1157 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1158 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1159 locations->SetOut(LocationFrom(r0));
1160 }
1161
VisitStringNewStringFromChars(HInvoke * invoke)1162 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1163 // No need to emit code checking whether `locations->InAt(2)` is a null
1164 // pointer, as callers of the native method
1165 //
1166 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1167 //
1168 // all include a null check on `data` before calling that method.
1169 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1170 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1171 }
1172
VisitStringNewStringFromString(HInvoke * invoke)1173 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1174 LocationSummary* locations = new (allocator_) LocationSummary(
1175 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1176 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1177 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1178 locations->SetOut(LocationFrom(r0));
1179 }
1180
VisitStringNewStringFromString(HInvoke * invoke)1181 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1182 ArmVIXLAssembler* assembler = GetAssembler();
1183 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1184 __ Cmp(string_to_copy, 0);
1185 SlowPathCodeARMVIXL* slow_path =
1186 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1187 codegen_->AddSlowPath(slow_path);
1188 __ B(eq, slow_path->GetEntryLabel());
1189
1190 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1191 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1192
1193 __ Bind(slow_path->GetExitLabel());
1194 }
1195
GenArrayAddress(ArmVIXLAssembler * assembler,vixl32::Register dest,vixl32::Register base,Location pos,DataType::Type type,int32_t data_offset)1196 static void GenArrayAddress(ArmVIXLAssembler* assembler,
1197 vixl32::Register dest,
1198 vixl32::Register base,
1199 Location pos,
1200 DataType::Type type,
1201 int32_t data_offset) {
1202 if (pos.IsConstant()) {
1203 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
1204 __ Add(dest, base, static_cast<int32_t>(DataType::Size(type)) * constant + data_offset);
1205 } else {
1206 if (data_offset != 0) {
1207 __ Add(dest, base, data_offset);
1208 base = dest;
1209 }
1210 __ Add(dest, base, Operand(RegisterFrom(pos), LSL, DataType::SizeShift(type)));
1211 }
1212 }
1213
LocationForSystemArrayCopyInput(ArmVIXLAssembler * assembler,HInstruction * input)1214 static Location LocationForSystemArrayCopyInput(ArmVIXLAssembler* assembler, HInstruction* input) {
1215 HIntConstant* const_input = input->AsIntConstantOrNull();
1216 if (const_input != nullptr && assembler->ShifterOperandCanAlwaysHold(const_input->GetValue())) {
1217 return Location::ConstantLocation(const_input);
1218 } else {
1219 return Location::RequiresRegister();
1220 }
1221 }
1222
1223 // We choose to use the native implementation for longer copy lengths.
1224 static constexpr int32_t kSystemArrayCopyThreshold = 128;
1225
VisitSystemArrayCopy(HInvoke * invoke)1226 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1227 // The only read barrier implementation supporting the
1228 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1229 if (codegen_->EmitNonBakerReadBarrier()) {
1230 return;
1231 }
1232
1233 constexpr size_t kInitialNumTemps = 3u; // We need at least three temps.
1234 LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
1235 invoke, kSystemArrayCopyThreshold, kInitialNumTemps);
1236 if (locations != nullptr) {
1237 locations->SetInAt(1, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(1)));
1238 locations->SetInAt(3, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(3)));
1239 locations->SetInAt(4, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(4)));
1240 if (codegen_->EmitBakerReadBarrier()) {
1241 // Temporary register IP cannot be used in
1242 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1243 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1244 // temporary register from the register allocator.
1245 locations->AddTemp(Location::RequiresRegister());
1246 }
1247 }
1248 }
1249
CheckSystemArrayCopyPosition(ArmVIXLAssembler * assembler,vixl32::Register array,Location pos,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_array_length,bool position_sign_checked)1250 static void CheckSystemArrayCopyPosition(ArmVIXLAssembler* assembler,
1251 vixl32::Register array,
1252 Location pos,
1253 Location length,
1254 SlowPathCodeARMVIXL* slow_path,
1255 vixl32::Register temp,
1256 bool length_is_array_length,
1257 bool position_sign_checked) {
1258 // Where is the length in the Array?
1259 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1260
1261 if (pos.IsConstant()) {
1262 int32_t pos_const = Int32ConstantFrom(pos);
1263 if (pos_const == 0) {
1264 if (!length_is_array_length) {
1265 // Check that length(array) >= length.
1266 __ Ldr(temp, MemOperand(array, length_offset));
1267 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1268 __ B(lt, slow_path->GetEntryLabel());
1269 }
1270 } else {
1271 // Calculate length(array) - pos.
1272 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1273 // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1274 __ Ldr(temp, MemOperand(array, length_offset));
1275 __ Sub(temp, temp, pos_const);
1276
1277 // Check that (length(array) - pos) >= length.
1278 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1279 __ B(lt, slow_path->GetEntryLabel());
1280 }
1281 } else if (length_is_array_length) {
1282 // The only way the copy can succeed is if pos is zero.
1283 vixl32::Register pos_reg = RegisterFrom(pos);
1284 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1285 } else {
1286 // Check that pos >= 0.
1287 vixl32::Register pos_reg = RegisterFrom(pos);
1288 if (!position_sign_checked) {
1289 __ Cmp(pos_reg, 0);
1290 __ B(lt, slow_path->GetEntryLabel());
1291 }
1292
1293 // Calculate length(array) - pos.
1294 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1295 // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1296 __ Ldr(temp, MemOperand(array, length_offset));
1297 __ Sub(temp, temp, pos_reg);
1298
1299 // Check that (length(array) - pos) >= length.
1300 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1301 __ B(lt, slow_path->GetEntryLabel());
1302 }
1303 }
1304
VisitSystemArrayCopy(HInvoke * invoke)1305 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1306 // The only read barrier implementation supporting the
1307 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1308 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
1309
1310 ArmVIXLAssembler* assembler = GetAssembler();
1311 LocationSummary* locations = invoke->GetLocations();
1312
1313 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1314 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1315 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1316 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1317 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1318
1319 vixl32::Register src = InputRegisterAt(invoke, 0);
1320 Location src_pos = locations->InAt(1);
1321 vixl32::Register dest = InputRegisterAt(invoke, 2);
1322 Location dest_pos = locations->InAt(3);
1323 Location length = locations->InAt(4);
1324 Location temp1_loc = locations->GetTemp(0);
1325 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1326 Location temp2_loc = locations->GetTemp(1);
1327 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1328 Location temp3_loc = locations->GetTemp(2);
1329 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1330
1331 SlowPathCodeARMVIXL* intrinsic_slow_path =
1332 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1333 codegen_->AddSlowPath(intrinsic_slow_path);
1334
1335 vixl32::Label conditions_on_positions_validated;
1336 SystemArrayCopyOptimizations optimizations(invoke);
1337
1338 // If source and destination are the same, we go to slow path if we need to do forward copying.
1339 // We do not need to do this check if the source and destination positions are the same.
1340 if (!optimizations.GetSourcePositionIsDestinationPosition()) {
1341 if (src_pos.IsConstant()) {
1342 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1343 if (dest_pos.IsConstant()) {
1344 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1345 if (optimizations.GetDestinationIsSource()) {
1346 // Checked when building locations.
1347 DCHECK_GE(src_pos_constant, dest_pos_constant);
1348 } else if (src_pos_constant < dest_pos_constant) {
1349 __ Cmp(src, dest);
1350 __ B(eq, intrinsic_slow_path->GetEntryLabel());
1351 }
1352 } else {
1353 if (!optimizations.GetDestinationIsSource()) {
1354 __ Cmp(src, dest);
1355 __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1356 }
1357 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1358 __ B(gt, intrinsic_slow_path->GetEntryLabel());
1359 }
1360 } else {
1361 if (!optimizations.GetDestinationIsSource()) {
1362 __ Cmp(src, dest);
1363 __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1364 }
1365 __ Cmp(RegisterFrom(src_pos), OperandFrom(dest_pos, DataType::Type::kInt32));
1366 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1367 }
1368 }
1369
1370 __ Bind(&conditions_on_positions_validated);
1371
1372 if (!optimizations.GetSourceIsNotNull()) {
1373 // Bail out if the source is null.
1374 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1375 }
1376
1377 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1378 // Bail out if the destination is null.
1379 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1380 }
1381
1382 // We have already checked in the LocationsBuilder for the constant case.
1383 if (!length.IsConstant()) {
1384 // Merge the following two comparisons into one:
1385 // If the length is negative, bail out (delegate to libcore's native implementation).
1386 // If the length >= 128 then (currently) prefer native implementation.
1387 __ Cmp(RegisterFrom(length), kSystemArrayCopyThreshold);
1388 __ B(hs, intrinsic_slow_path->GetEntryLabel());
1389 }
1390
1391 // Validity checks: source.
1392 CheckSystemArrayCopyPosition(assembler,
1393 src,
1394 src_pos,
1395 length,
1396 intrinsic_slow_path,
1397 temp1,
1398 optimizations.GetCountIsSourceLength(),
1399 /*position_sign_checked=*/ false);
1400
1401 // Validity checks: dest.
1402 bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1403 CheckSystemArrayCopyPosition(assembler,
1404 dest,
1405 dest_pos,
1406 length,
1407 intrinsic_slow_path,
1408 temp1,
1409 optimizations.GetCountIsDestinationLength(),
1410 dest_position_sign_checked);
1411
1412 auto check_non_primitive_array_class = [&](vixl32::Register klass, vixl32::Register temp) {
1413 // No read barrier is needed for reading a chain of constant references for comparing
1414 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1415 // /* HeapReference<Class> */ temp = klass->component_type_
1416 __ Ldr(temp, MemOperand(klass, component_offset));
1417 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp);
1418 // Check that the component type is not null.
1419 __ CompareAndBranchIfZero(temp, intrinsic_slow_path->GetEntryLabel());
1420 // Check that the component type is not a primitive.
1421 // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
1422 __ Ldrh(temp, MemOperand(temp, primitive_offset));
1423 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1424 __ CompareAndBranchIfNonZero(temp, intrinsic_slow_path->GetEntryLabel());
1425 };
1426
1427 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1428 // Check whether all elements of the source array are assignable to the component
1429 // type of the destination array. We do two checks: the classes are the same,
1430 // or the destination is Object[]. If none of these checks succeed, we go to the
1431 // slow path.
1432
1433 if (codegen_->EmitBakerReadBarrier()) {
1434 // /* HeapReference<Class> */ temp1 = dest->klass_
1435 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1436 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
1437 // Register `temp1` is not trashed by the read barrier emitted
1438 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1439 // method produces a call to a ReadBarrierMarkRegX entry point,
1440 // which saves all potentially live registers, including
1441 // temporaries such a `temp1`.
1442 // /* HeapReference<Class> */ temp2 = src->klass_
1443 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1444 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
1445 } else {
1446 // /* HeapReference<Class> */ temp1 = dest->klass_
1447 __ Ldr(temp1, MemOperand(dest, class_offset));
1448 assembler->MaybeUnpoisonHeapReference(temp1);
1449 // /* HeapReference<Class> */ temp2 = src->klass_
1450 __ Ldr(temp2, MemOperand(src, class_offset));
1451 assembler->MaybeUnpoisonHeapReference(temp2);
1452 }
1453
1454 __ Cmp(temp1, temp2);
1455 if (optimizations.GetDestinationIsTypedObjectArray()) {
1456 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1457 vixl32::Label do_copy;
1458 // For class match, we can skip the source type check regardless of the optimization flag.
1459 __ B(eq, &do_copy, /* is_far_target= */ false);
1460 // No read barrier is needed for reading a chain of constant references
1461 // for comparing with null, see `ReadBarrierOption`.
1462 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1463 __ Ldr(temp1, MemOperand(temp1, component_offset));
1464 assembler->MaybeUnpoisonHeapReference(temp1);
1465 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1466 __ Ldr(temp1, MemOperand(temp1, super_offset));
1467 // No need to unpoison the result, we're comparing against null.
1468 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1469 // Bail out if the source is not a non primitive array.
1470 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1471 check_non_primitive_array_class(temp2, temp2);
1472 }
1473 __ Bind(&do_copy);
1474 } else {
1475 DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1476 // For class match, we can skip the array type check completely if at least one of source
1477 // and destination is known to be a non primitive array, otherwise one check is enough.
1478 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1479 if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1480 !optimizations.GetSourceIsNonPrimitiveArray()) {
1481 check_non_primitive_array_class(temp2, temp2);
1482 }
1483 }
1484 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1485 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1486 // Bail out if the source is not a non primitive array.
1487 // No read barrier is needed for reading a chain of constant references for comparing
1488 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1489 // /* HeapReference<Class> */ temp2 = src->klass_
1490 __ Ldr(temp2, MemOperand(src, class_offset));
1491 assembler->MaybeUnpoisonHeapReference(temp2);
1492 check_non_primitive_array_class(temp2, temp2);
1493 }
1494
1495 if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
1496 // Null constant length: not need to emit the loop code at all.
1497 } else {
1498 vixl32::Label skip_copy_and_write_barrier;
1499 if (length.IsRegister()) {
1500 // Don't enter the copy loop if the length is null.
1501 __ CompareAndBranchIfZero(
1502 RegisterFrom(length), &skip_copy_and_write_barrier, /* is_far_target= */ false);
1503 }
1504
1505 const DataType::Type type = DataType::Type::kReference;
1506 const int32_t element_size = DataType::Size(type);
1507 const int32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1508
1509 SlowPathCodeARMVIXL* read_barrier_slow_path = nullptr;
1510 vixl32::Register rb_tmp;
1511 bool emit_rb = codegen_->EmitBakerReadBarrier();
1512 if (emit_rb) {
1513 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1514
1515 // SystemArrayCopy implementation for Baker read barriers (see
1516 // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
1517 //
1518 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1519 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
1520 // bool is_gray = (rb_state == ReadBarrier::GrayState());
1521 // if (is_gray) {
1522 // // Slow-path copy.
1523 // do {
1524 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1525 // } while (src_ptr != end_ptr)
1526 // } else {
1527 // // Fast-path copy.
1528 // do {
1529 // *dest_ptr++ = *src_ptr++;
1530 // } while (src_ptr != end_ptr)
1531 // }
1532
1533 // /* int32_t */ monitor = src->monitor_
1534 rb_tmp = RegisterFrom(locations->GetTemp(3));
1535 __ Ldr(rb_tmp, MemOperand(src, monitor_offset));
1536 // /* LockWord */ lock_word = LockWord(monitor)
1537 static_assert(sizeof(LockWord) == sizeof(int32_t),
1538 "art::LockWord and int32_t have different sizes.");
1539
1540 // Introduce a dependency on the lock_word including the rb_state,
1541 // which shall prevent load-load reordering without using
1542 // a memory barrier (which would be more expensive).
1543 // `src` is unchanged by this operation, but its value now depends
1544 // on `temp2`.
1545 __ Add(src, src, Operand(rb_tmp, vixl32::LSR, 32));
1546
1547 // Slow path used to copy array when `src` is gray.
1548 // Note that the base destination address is computed in `temp2`
1549 // by the slow path code.
1550 read_barrier_slow_path =
1551 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
1552 codegen_->AddSlowPath(read_barrier_slow_path);
1553 }
1554
1555 // Compute the base source address in `temp1`.
1556 // Note that for read barrier, `temp1` (the base source address) is computed from `src`
1557 // (and `src_pos`) here, and thus honors the artificial dependency of `src` on `rb_tmp`.
1558 GenArrayAddress(GetAssembler(), temp1, src, src_pos, type, data_offset);
1559 // Compute the base destination address in `temp2`.
1560 GenArrayAddress(GetAssembler(), temp2, dest, dest_pos, type, data_offset);
1561 // Compute the end source address in `temp3`.
1562 GenArrayAddress(GetAssembler(), temp3, temp1, length, type, /*data_offset=*/ 0);
1563
1564 if (emit_rb) {
1565 // Given the numeric representation, it's enough to check the low bit of the
1566 // rb_state. We do that by shifting the bit out of the lock word with LSRS
1567 // which can be a 16-bit instruction unlike the TST immediate.
1568 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1569 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1570 DCHECK(rb_tmp.IsValid());
1571 __ Lsrs(rb_tmp, rb_tmp, LockWord::kReadBarrierStateShift + 1);
1572 // Carry flag is the last bit shifted out by LSRS.
1573 __ B(cs, read_barrier_slow_path->GetEntryLabel());
1574 }
1575
1576 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1577 // poison/unpoison.
1578 vixl32::Label loop;
1579 __ Bind(&loop);
1580 {
1581 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1582 const vixl32::Register temp_reg = temps.Acquire();
1583 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1584 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1585 }
1586 __ Cmp(temp1, temp3);
1587 __ B(ne, &loop, /* is_far_target= */ false);
1588
1589 if (emit_rb) {
1590 DCHECK(read_barrier_slow_path != nullptr);
1591 __ Bind(read_barrier_slow_path->GetExitLabel());
1592 }
1593
1594 // We only need one card marking on the destination array.
1595 codegen_->MarkGCCard(temp1, temp2, dest);
1596
1597 __ Bind(&skip_copy_and_write_barrier);
1598 }
1599
1600 __ Bind(intrinsic_slow_path->GetExitLabel());
1601 }
1602
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1603 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1604 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1605 // the code generator. Furthermore, the register allocator creates fixed live intervals
1606 // for all caller-saved registers because we are doing a function call. As a result, if
1607 // the input and output locations are unallocated, the register allocator runs out of
1608 // registers and fails; however, a debuggable graph is not the common case.
1609 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1610 return;
1611 }
1612
1613 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1614 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1615 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1616
1617 LocationSummary* const locations =
1618 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1619 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1620
1621 locations->SetInAt(0, Location::RequiresFpuRegister());
1622 locations->SetOut(Location::RequiresFpuRegister());
1623 // Native code uses the soft float ABI.
1624 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1625 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1626 }
1627
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1628 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1629 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1630 // the code generator. Furthermore, the register allocator creates fixed live intervals
1631 // for all caller-saved registers because we are doing a function call. As a result, if
1632 // the input and output locations are unallocated, the register allocator runs out of
1633 // registers and fails; however, a debuggable graph is not the common case.
1634 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1635 return;
1636 }
1637
1638 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1639 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1640 DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
1641 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1642
1643 LocationSummary* const locations =
1644 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1645 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1646
1647 locations->SetInAt(0, Location::RequiresFpuRegister());
1648 locations->SetInAt(1, Location::RequiresFpuRegister());
1649 locations->SetOut(Location::RequiresFpuRegister());
1650 // Native code uses the soft float ABI.
1651 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1652 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1653 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1654 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
1655 }
1656
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1657 static void GenFPToFPCall(HInvoke* invoke,
1658 ArmVIXLAssembler* assembler,
1659 CodeGeneratorARMVIXL* codegen,
1660 QuickEntrypointEnum entry) {
1661 LocationSummary* const locations = invoke->GetLocations();
1662
1663 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1664 DCHECK(locations->WillCall() && locations->Intrinsified());
1665
1666 // Native code uses the soft float ABI.
1667 __ Vmov(RegisterFrom(locations->GetTemp(0)),
1668 RegisterFrom(locations->GetTemp(1)),
1669 InputDRegisterAt(invoke, 0));
1670 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1671 __ Vmov(OutputDRegister(invoke),
1672 RegisterFrom(locations->GetTemp(0)),
1673 RegisterFrom(locations->GetTemp(1)));
1674 }
1675
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1676 static void GenFPFPToFPCall(HInvoke* invoke,
1677 ArmVIXLAssembler* assembler,
1678 CodeGeneratorARMVIXL* codegen,
1679 QuickEntrypointEnum entry) {
1680 LocationSummary* const locations = invoke->GetLocations();
1681
1682 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1683 DCHECK(locations->WillCall() && locations->Intrinsified());
1684
1685 // Native code uses the soft float ABI.
1686 __ Vmov(RegisterFrom(locations->GetTemp(0)),
1687 RegisterFrom(locations->GetTemp(1)),
1688 InputDRegisterAt(invoke, 0));
1689 __ Vmov(RegisterFrom(locations->GetTemp(2)),
1690 RegisterFrom(locations->GetTemp(3)),
1691 InputDRegisterAt(invoke, 1));
1692 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1693 __ Vmov(OutputDRegister(invoke),
1694 RegisterFrom(locations->GetTemp(0)),
1695 RegisterFrom(locations->GetTemp(1)));
1696 }
1697
VisitMathCos(HInvoke * invoke)1698 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
1699 CreateFPToFPCallLocations(allocator_, invoke);
1700 }
1701
VisitMathCos(HInvoke * invoke)1702 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
1703 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
1704 }
1705
VisitMathSin(HInvoke * invoke)1706 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
1707 CreateFPToFPCallLocations(allocator_, invoke);
1708 }
1709
VisitMathSin(HInvoke * invoke)1710 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
1711 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
1712 }
1713
VisitMathAcos(HInvoke * invoke)1714 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
1715 CreateFPToFPCallLocations(allocator_, invoke);
1716 }
1717
VisitMathAcos(HInvoke * invoke)1718 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
1719 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
1720 }
1721
VisitMathAsin(HInvoke * invoke)1722 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
1723 CreateFPToFPCallLocations(allocator_, invoke);
1724 }
1725
VisitMathAsin(HInvoke * invoke)1726 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
1727 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
1728 }
1729
VisitMathAtan(HInvoke * invoke)1730 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
1731 CreateFPToFPCallLocations(allocator_, invoke);
1732 }
1733
VisitMathAtan(HInvoke * invoke)1734 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
1735 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
1736 }
1737
VisitMathCbrt(HInvoke * invoke)1738 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1739 CreateFPToFPCallLocations(allocator_, invoke);
1740 }
1741
VisitMathCbrt(HInvoke * invoke)1742 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1743 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
1744 }
1745
VisitMathCosh(HInvoke * invoke)1746 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
1747 CreateFPToFPCallLocations(allocator_, invoke);
1748 }
1749
VisitMathCosh(HInvoke * invoke)1750 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
1751 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
1752 }
1753
VisitMathExp(HInvoke * invoke)1754 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
1755 CreateFPToFPCallLocations(allocator_, invoke);
1756 }
1757
VisitMathExp(HInvoke * invoke)1758 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
1759 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
1760 }
1761
VisitMathExpm1(HInvoke * invoke)1762 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1763 CreateFPToFPCallLocations(allocator_, invoke);
1764 }
1765
VisitMathExpm1(HInvoke * invoke)1766 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1767 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
1768 }
1769
VisitMathLog(HInvoke * invoke)1770 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
1771 CreateFPToFPCallLocations(allocator_, invoke);
1772 }
1773
VisitMathLog(HInvoke * invoke)1774 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
1775 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
1776 }
1777
VisitMathLog10(HInvoke * invoke)1778 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
1779 CreateFPToFPCallLocations(allocator_, invoke);
1780 }
1781
VisitMathLog10(HInvoke * invoke)1782 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
1783 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
1784 }
1785
VisitMathSinh(HInvoke * invoke)1786 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
1787 CreateFPToFPCallLocations(allocator_, invoke);
1788 }
1789
VisitMathSinh(HInvoke * invoke)1790 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
1791 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
1792 }
1793
VisitMathTan(HInvoke * invoke)1794 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
1795 CreateFPToFPCallLocations(allocator_, invoke);
1796 }
1797
VisitMathTan(HInvoke * invoke)1798 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
1799 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
1800 }
1801
VisitMathTanh(HInvoke * invoke)1802 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
1803 CreateFPToFPCallLocations(allocator_, invoke);
1804 }
1805
VisitMathTanh(HInvoke * invoke)1806 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
1807 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
1808 }
1809
VisitMathAtan2(HInvoke * invoke)1810 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1811 CreateFPFPToFPCallLocations(allocator_, invoke);
1812 }
1813
VisitMathAtan2(HInvoke * invoke)1814 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1815 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
1816 }
1817
VisitMathPow(HInvoke * invoke)1818 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
1819 CreateFPFPToFPCallLocations(allocator_, invoke);
1820 }
1821
VisitMathPow(HInvoke * invoke)1822 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
1823 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
1824 }
1825
VisitMathHypot(HInvoke * invoke)1826 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
1827 CreateFPFPToFPCallLocations(allocator_, invoke);
1828 }
1829
VisitMathHypot(HInvoke * invoke)1830 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
1831 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
1832 }
1833
VisitMathNextAfter(HInvoke * invoke)1834 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1835 CreateFPFPToFPCallLocations(allocator_, invoke);
1836 }
1837
VisitMathNextAfter(HInvoke * invoke)1838 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1839 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
1840 }
1841
VisitIntegerReverse(HInvoke * invoke)1842 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1843 CreateIntToIntLocations(allocator_, invoke);
1844 }
1845
VisitIntegerReverse(HInvoke * invoke)1846 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1847 ArmVIXLAssembler* assembler = GetAssembler();
1848 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
1849 }
1850
VisitLongReverse(HInvoke * invoke)1851 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
1852 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1853 }
1854
VisitLongReverse(HInvoke * invoke)1855 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
1856 ArmVIXLAssembler* assembler = GetAssembler();
1857 LocationSummary* locations = invoke->GetLocations();
1858
1859 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
1860 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
1861 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
1862 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
1863
1864 __ Rbit(out_reg_lo, in_reg_hi);
1865 __ Rbit(out_reg_hi, in_reg_lo);
1866 }
1867
GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler * assembler,Location pair)1868 static void GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler* assembler, Location pair) {
1869 DCHECK(pair.IsRegisterPair());
1870 __ Rev(LowRegisterFrom(pair), LowRegisterFrom(pair));
1871 __ Rev(HighRegisterFrom(pair), HighRegisterFrom(pair));
1872 }
1873
GenerateReverseBytes(ArmVIXLAssembler * assembler,DataType::Type type,Location in,Location out)1874 static void GenerateReverseBytes(ArmVIXLAssembler* assembler,
1875 DataType::Type type,
1876 Location in,
1877 Location out) {
1878 switch (type) {
1879 case DataType::Type::kUint16:
1880 __ Rev16(RegisterFrom(out), RegisterFrom(in));
1881 break;
1882 case DataType::Type::kInt16:
1883 __ Revsh(RegisterFrom(out), RegisterFrom(in));
1884 break;
1885 case DataType::Type::kInt32:
1886 __ Rev(RegisterFrom(out), RegisterFrom(in));
1887 break;
1888 case DataType::Type::kInt64:
1889 DCHECK(!LowRegisterFrom(out).Is(LowRegisterFrom(in)));
1890 __ Rev(LowRegisterFrom(out), HighRegisterFrom(in));
1891 __ Rev(HighRegisterFrom(out), LowRegisterFrom(in));
1892 break;
1893 case DataType::Type::kFloat32:
1894 __ Rev(RegisterFrom(in), RegisterFrom(in)); // Note: Clobbers `in`.
1895 __ Vmov(SRegisterFrom(out), RegisterFrom(in));
1896 break;
1897 case DataType::Type::kFloat64:
1898 GenerateReverseBytesInPlaceForEachWord(assembler, in); // Note: Clobbers `in`.
1899 __ Vmov(DRegisterFrom(out), HighRegisterFrom(in), LowRegisterFrom(in)); // Swap high/low.
1900 break;
1901 default:
1902 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
1903 UNREACHABLE();
1904 }
1905 }
1906
VisitIntegerReverseBytes(HInvoke * invoke)1907 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
1908 CreateIntToIntLocations(allocator_, invoke);
1909 }
1910
VisitIntegerReverseBytes(HInvoke * invoke)1911 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
1912 ArmVIXLAssembler* assembler = GetAssembler();
1913 LocationSummary* locations = invoke->GetLocations();
1914 GenerateReverseBytes(assembler, DataType::Type::kInt32, locations->InAt(0), locations->Out());
1915 }
1916
VisitLongReverseBytes(HInvoke * invoke)1917 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
1918 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1919 }
1920
VisitLongReverseBytes(HInvoke * invoke)1921 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
1922 ArmVIXLAssembler* assembler = GetAssembler();
1923 LocationSummary* locations = invoke->GetLocations();
1924 GenerateReverseBytes(assembler, DataType::Type::kInt64, locations->InAt(0), locations->Out());
1925 }
1926
VisitShortReverseBytes(HInvoke * invoke)1927 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
1928 CreateIntToIntLocations(allocator_, invoke);
1929 }
1930
VisitShortReverseBytes(HInvoke * invoke)1931 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
1932 ArmVIXLAssembler* assembler = GetAssembler();
1933 LocationSummary* locations = invoke->GetLocations();
1934 GenerateReverseBytes(assembler, DataType::Type::kInt16, locations->InAt(0), locations->Out());
1935 }
1936
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)1937 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
1938 DCHECK(DataType::IsIntOrLongType(type)) << type;
1939 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
1940 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
1941
1942 bool is_long = type == DataType::Type::kInt64;
1943 LocationSummary* locations = instr->GetLocations();
1944 Location in = locations->InAt(0);
1945 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
1946 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
1947 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
1948 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
1949 vixl32::Register out_r = OutputRegister(instr);
1950
1951 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
1952 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
1953 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
1954 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
1955 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
1956 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
1957 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
1958 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
1959 if (is_long) {
1960 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
1961 }
1962 __ Vmov(out_r, tmp_s);
1963 }
1964
VisitIntegerBitCount(HInvoke * invoke)1965 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
1966 CreateIntToIntLocations(allocator_, invoke);
1967 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1968 }
1969
VisitIntegerBitCount(HInvoke * invoke)1970 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
1971 GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
1972 }
1973
VisitLongBitCount(HInvoke * invoke)1974 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
1975 VisitIntegerBitCount(invoke);
1976 }
1977
VisitLongBitCount(HInvoke * invoke)1978 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
1979 GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
1980 }
1981
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)1982 static void GenHighestOneBit(HInvoke* invoke,
1983 DataType::Type type,
1984 CodeGeneratorARMVIXL* codegen) {
1985 DCHECK(DataType::IsIntOrLongType(type));
1986
1987 ArmVIXLAssembler* assembler = codegen->GetAssembler();
1988 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1989 const vixl32::Register temp = temps.Acquire();
1990
1991 if (type == DataType::Type::kInt64) {
1992 LocationSummary* locations = invoke->GetLocations();
1993 Location in = locations->InAt(0);
1994 Location out = locations->Out();
1995
1996 vixl32::Register in_reg_lo = LowRegisterFrom(in);
1997 vixl32::Register in_reg_hi = HighRegisterFrom(in);
1998 vixl32::Register out_reg_lo = LowRegisterFrom(out);
1999 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2000
2001 __ Mov(temp, 0x80000000); // Modified immediate.
2002 __ Clz(out_reg_lo, in_reg_lo);
2003 __ Clz(out_reg_hi, in_reg_hi);
2004 __ Lsr(out_reg_lo, temp, out_reg_lo);
2005 __ Lsrs(out_reg_hi, temp, out_reg_hi);
2006
2007 // Discard result for lowest 32 bits if highest 32 bits are not zero.
2008 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2009 // we check that the output is in a low register, so that a 16-bit MOV
2010 // encoding can be used. If output is in a high register, then we generate
2011 // 4 more bytes of code to avoid a branch.
2012 Operand mov_src(0);
2013 if (!out_reg_lo.IsLow()) {
2014 __ Mov(LeaveFlags, temp, 0);
2015 mov_src = Operand(temp);
2016 }
2017 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2018 2 * vixl32::k16BitT32InstructionSizeInBytes,
2019 CodeBufferCheckScope::kExactSize);
2020 __ it(ne);
2021 __ mov(ne, out_reg_lo, mov_src);
2022 } else {
2023 vixl32::Register out = OutputRegister(invoke);
2024 vixl32::Register in = InputRegisterAt(invoke, 0);
2025
2026 __ Mov(temp, 0x80000000); // Modified immediate.
2027 __ Clz(out, in);
2028 __ Lsr(out, temp, out);
2029 }
2030 }
2031
VisitIntegerHighestOneBit(HInvoke * invoke)2032 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2033 CreateIntToIntLocations(allocator_, invoke);
2034 }
2035
VisitIntegerHighestOneBit(HInvoke * invoke)2036 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2037 GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2038 }
2039
VisitLongHighestOneBit(HInvoke * invoke)2040 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2041 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2042 }
2043
VisitLongHighestOneBit(HInvoke * invoke)2044 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2045 GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2046 }
2047
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2048 static void GenLowestOneBit(HInvoke* invoke,
2049 DataType::Type type,
2050 CodeGeneratorARMVIXL* codegen) {
2051 DCHECK(DataType::IsIntOrLongType(type));
2052
2053 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2054 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2055 const vixl32::Register temp = temps.Acquire();
2056
2057 if (type == DataType::Type::kInt64) {
2058 LocationSummary* locations = invoke->GetLocations();
2059 Location in = locations->InAt(0);
2060 Location out = locations->Out();
2061
2062 vixl32::Register in_reg_lo = LowRegisterFrom(in);
2063 vixl32::Register in_reg_hi = HighRegisterFrom(in);
2064 vixl32::Register out_reg_lo = LowRegisterFrom(out);
2065 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2066
2067 __ Rsb(out_reg_hi, in_reg_hi, 0);
2068 __ Rsb(out_reg_lo, in_reg_lo, 0);
2069 __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2070 // The result of this operation is 0 iff in_reg_lo is 0
2071 __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2072
2073 // Discard result for highest 32 bits if lowest 32 bits are not zero.
2074 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2075 // we check that the output is in a low register, so that a 16-bit MOV
2076 // encoding can be used. If output is in a high register, then we generate
2077 // 4 more bytes of code to avoid a branch.
2078 Operand mov_src(0);
2079 if (!out_reg_lo.IsLow()) {
2080 __ Mov(LeaveFlags, temp, 0);
2081 mov_src = Operand(temp);
2082 }
2083 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2084 2 * vixl32::k16BitT32InstructionSizeInBytes,
2085 CodeBufferCheckScope::kExactSize);
2086 __ it(ne);
2087 __ mov(ne, out_reg_hi, mov_src);
2088 } else {
2089 vixl32::Register out = OutputRegister(invoke);
2090 vixl32::Register in = InputRegisterAt(invoke, 0);
2091
2092 __ Rsb(temp, in, 0);
2093 __ And(out, temp, in);
2094 }
2095 }
2096
VisitIntegerLowestOneBit(HInvoke * invoke)2097 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2098 CreateIntToIntLocations(allocator_, invoke);
2099 }
2100
VisitIntegerLowestOneBit(HInvoke * invoke)2101 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2102 GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2103 }
2104
VisitLongLowestOneBit(HInvoke * invoke)2105 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2106 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2107 }
2108
VisitLongLowestOneBit(HInvoke * invoke)2109 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2110 GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2111 }
2112
VisitStringGetCharsNoCheck(HInvoke * invoke)2113 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2114 LocationSummary* locations =
2115 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2116 locations->SetInAt(0, Location::RequiresRegister());
2117 locations->SetInAt(1, Location::RequiresRegister());
2118 locations->SetInAt(2, Location::RequiresRegister());
2119 locations->SetInAt(3, Location::RequiresRegister());
2120 locations->SetInAt(4, Location::RequiresRegister());
2121
2122 // Temporary registers to store lengths of strings and for calculations.
2123 locations->AddTemp(Location::RequiresRegister());
2124 locations->AddTemp(Location::RequiresRegister());
2125 locations->AddTemp(Location::RequiresRegister());
2126 }
2127
VisitStringGetCharsNoCheck(HInvoke * invoke)2128 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2129 ArmVIXLAssembler* assembler = GetAssembler();
2130 LocationSummary* locations = invoke->GetLocations();
2131
2132 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2133 const size_t char_size = DataType::Size(DataType::Type::kUint16);
2134 DCHECK_EQ(char_size, 2u);
2135
2136 // Location of data in char array buffer.
2137 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2138
2139 // Location of char array data in string.
2140 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2141
2142 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2143 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2144 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2145 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2146 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2147 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2148 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2149
2150 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2151 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2152 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2153
2154 vixl32::Label done, compressed_string_loop;
2155 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2156 // dst to be copied.
2157 __ Add(dst_ptr, dstObj, data_offset);
2158 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2159
2160 __ Subs(num_chr, srcEnd, srcBegin);
2161 // Early out for valid zero-length retrievals.
2162 __ B(eq, final_label, /* is_far_target= */ false);
2163
2164 // src range to copy.
2165 __ Add(src_ptr, srcObj, value_offset);
2166
2167 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2168 vixl32::Register temp;
2169 vixl32::Label compressed_string_preloop;
2170 if (mirror::kUseStringCompression) {
2171 // Location of count in string.
2172 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2173 temp = temps.Acquire();
2174 // String's length.
2175 __ Ldr(temp, MemOperand(srcObj, count_offset));
2176 __ Tst(temp, 1);
2177 temps.Release(temp);
2178 __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2179 }
2180 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2181
2182 // Do the copy.
2183 vixl32::Label loop, remainder;
2184
2185 temp = temps.Acquire();
2186 // Save repairing the value of num_chr on the < 4 character path.
2187 __ Subs(temp, num_chr, 4);
2188 __ B(lt, &remainder, /* is_far_target= */ false);
2189
2190 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2191 __ Mov(num_chr, temp);
2192
2193 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2194 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2195 // to rectify these everywhere this intrinsic applies.)
2196 __ Bind(&loop);
2197 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2198 __ Subs(num_chr, num_chr, 4);
2199 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2200 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2201 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2202 temps.Release(temp);
2203 __ B(ge, &loop, /* is_far_target= */ false);
2204
2205 __ Adds(num_chr, num_chr, 4);
2206 __ B(eq, final_label, /* is_far_target= */ false);
2207
2208 // Main loop for < 4 character case and remainder handling. Loads and stores one
2209 // 16-bit Java character at a time.
2210 __ Bind(&remainder);
2211 temp = temps.Acquire();
2212 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2213 __ Subs(num_chr, num_chr, 1);
2214 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2215 temps.Release(temp);
2216 __ B(gt, &remainder, /* is_far_target= */ false);
2217
2218 if (mirror::kUseStringCompression) {
2219 __ B(final_label);
2220
2221 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2222 DCHECK_EQ(c_char_size, 1u);
2223 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2224 __ Bind(&compressed_string_preloop);
2225 __ Add(src_ptr, src_ptr, srcBegin);
2226 __ Bind(&compressed_string_loop);
2227 temp = temps.Acquire();
2228 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2229 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2230 temps.Release(temp);
2231 __ Subs(num_chr, num_chr, 1);
2232 __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2233 }
2234
2235 if (done.IsReferenced()) {
2236 __ Bind(&done);
2237 }
2238 }
2239
VisitFloatIsInfinite(HInvoke * invoke)2240 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2241 CreateFPToIntLocations(allocator_, invoke);
2242 }
2243
VisitFloatIsInfinite(HInvoke * invoke)2244 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2245 ArmVIXLAssembler* const assembler = GetAssembler();
2246 const vixl32::Register out = OutputRegister(invoke);
2247 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2248 // we don't care about the sign bit anyway.
2249 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2250
2251 __ Vmov(out, InputSRegisterAt(invoke, 0));
2252 // We don't care about the sign bit, so shift left.
2253 __ Lsl(out, out, 1);
2254 __ Eor(out, out, infinity);
2255 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2256 }
2257
VisitDoubleIsInfinite(HInvoke * invoke)2258 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2259 CreateFPToIntLocations(allocator_, invoke);
2260 }
2261
VisitDoubleIsInfinite(HInvoke * invoke)2262 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2263 ArmVIXLAssembler* const assembler = GetAssembler();
2264 const vixl32::Register out = OutputRegister(invoke);
2265 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2266 const vixl32::Register temp = temps.Acquire();
2267 // The highest 32 bits of double precision positive infinity separated into
2268 // two constants encodable as immediate operands.
2269 constexpr uint32_t infinity_high = 0x7f000000U;
2270 constexpr uint32_t infinity_high2 = 0x00f00000U;
2271
2272 static_assert((infinity_high | infinity_high2) ==
2273 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2274 "The constants do not add up to the high 32 bits of double "
2275 "precision positive infinity.");
2276 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2277 __ Eor(out, out, infinity_high);
2278 __ Eor(out, out, infinity_high2);
2279 // We don't care about the sign bit, so shift left.
2280 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2281 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2282 }
2283
VisitMathCeil(HInvoke * invoke)2284 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2285 if (features_.HasARMv8AInstructions()) {
2286 CreateFPToFPLocations(allocator_, invoke);
2287 }
2288 }
2289
VisitMathCeil(HInvoke * invoke)2290 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2291 ArmVIXLAssembler* assembler = GetAssembler();
2292 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2293 __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2294 }
2295
VisitMathFloor(HInvoke * invoke)2296 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2297 if (features_.HasARMv8AInstructions()) {
2298 CreateFPToFPLocations(allocator_, invoke);
2299 }
2300 }
2301
VisitMathFloor(HInvoke * invoke)2302 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2303 ArmVIXLAssembler* assembler = GetAssembler();
2304 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2305 __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2306 }
2307
2308 #define VISIT_INTRINSIC(name, low, high, type, start_index) \
2309 void IntrinsicLocationsBuilderARMVIXL::Visit##name##ValueOf(HInvoke* invoke) { \
2310 InvokeRuntimeCallingConventionARMVIXL calling_convention; \
2311 IntrinsicVisitor::ComputeValueOfLocations(invoke, \
2312 codegen_, \
2313 low, \
2314 (high) - (low) + 1, \
2315 LocationFrom(r0), \
2316 LocationFrom(calling_convention.GetRegisterAt(0))); \
2317 } \
2318 void IntrinsicCodeGeneratorARMVIXL::Visit##name##ValueOf(HInvoke* invoke) { \
2319 IntrinsicVisitor::ValueOfInfo info = \
2320 IntrinsicVisitor::ComputeValueOfInfo(invoke, \
2321 codegen_->GetCompilerOptions(), \
2322 WellKnownClasses::java_lang_##name##_value, \
2323 low, \
2324 (high) - (low) + 1, \
2325 start_index); \
2326 HandleValueOf(invoke, info, type); \
2327 }
BOXED_TYPES(VISIT_INTRINSIC)2328 BOXED_TYPES(VISIT_INTRINSIC)
2329 #undef VISIT_INTRINSIC
2330
2331
2332 void IntrinsicCodeGeneratorARMVIXL::HandleValueOf(HInvoke* invoke,
2333 const IntrinsicVisitor::ValueOfInfo& info,
2334 DataType::Type type) {
2335 LocationSummary* locations = invoke->GetLocations();
2336 ArmVIXLAssembler* const assembler = GetAssembler();
2337
2338 vixl32::Register out = RegisterFrom(locations->Out());
2339 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2340 vixl32::Register temp = temps.Acquire();
2341 auto allocate_instance = [&]() {
2342 DCHECK(out.Is(InvokeRuntimeCallingConventionARMVIXL().GetRegisterAt(0)));
2343 codegen_->LoadIntrinsicDeclaringClass(out, invoke);
2344 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2345 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2346 };
2347 if (invoke->InputAt(0)->IsIntConstant()) {
2348 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2349 if (static_cast<uint32_t>(value - info.low) < info.length) {
2350 // Just embed the object in the code.
2351 DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
2352 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2353 } else {
2354 DCHECK(locations->CanCall());
2355 // Allocate and initialize a new object.
2356 // TODO: If we JIT, we could allocate the object now, and store it in the
2357 // JIT object table.
2358 allocate_instance();
2359 __ Mov(temp, value);
2360 assembler->StoreToOffset(GetStoreOperandType(type), temp, out, info.value_offset);
2361 // Class pointer and `value` final field stores require a barrier before publication.
2362 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2363 }
2364 } else {
2365 DCHECK(locations->CanCall());
2366 vixl32::Register in = RegisterFrom(locations->InAt(0));
2367 // Check bounds of our cache.
2368 __ Add(out, in, -info.low);
2369 __ Cmp(out, info.length);
2370 vixl32::Label allocate, done;
2371 __ B(hs, &allocate, /* is_far_target= */ false);
2372 // If the value is within the bounds, load the object directly from the array.
2373 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2374 codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
2375 assembler->MaybeUnpoisonHeapReference(out);
2376 __ B(&done);
2377 __ Bind(&allocate);
2378 // Otherwise allocate and initialize a new object.
2379 allocate_instance();
2380 assembler->StoreToOffset(GetStoreOperandType(type), in, out, info.value_offset);
2381 // Class pointer and `value` final field stores require a barrier before publication.
2382 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2383 __ Bind(&done);
2384 }
2385 }
2386
VisitReferenceGetReferent(HInvoke * invoke)2387 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2388 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
2389 }
2390
VisitReferenceGetReferent(HInvoke * invoke)2391 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2392 ArmVIXLAssembler* assembler = GetAssembler();
2393 LocationSummary* locations = invoke->GetLocations();
2394
2395 Location obj = locations->InAt(0);
2396 Location out = locations->Out();
2397
2398 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2399 codegen_->AddSlowPath(slow_path);
2400
2401 if (codegen_->EmitReadBarrier()) {
2402 // Check self->GetWeakRefAccessEnabled().
2403 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2404 vixl32::Register temp = temps.Acquire();
2405 __ Ldr(temp,
2406 MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArmPointerSize>().Uint32Value()));
2407 __ Cmp(temp, enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled));
2408 __ B(ne, slow_path->GetEntryLabel());
2409 }
2410
2411 {
2412 // Load the java.lang.ref.Reference class.
2413 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2414 vixl32::Register temp = temps.Acquire();
2415 codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
2416
2417 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
2418 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
2419 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
2420 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
2421 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
2422 __ Ldrh(temp, MemOperand(temp, disable_intrinsic_offset.Uint32Value()));
2423 __ Cmp(temp, 0);
2424 __ B(ne, slow_path->GetEntryLabel());
2425 }
2426
2427 // Load the value from the field.
2428 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2429 if (codegen_->EmitBakerReadBarrier()) {
2430 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2431 out,
2432 RegisterFrom(obj),
2433 referent_offset,
2434 /*maybe_temp=*/ Location::NoLocation(),
2435 /*needs_null_check=*/ true);
2436 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2437 } else {
2438 {
2439 vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2440 __ Ldr(RegisterFrom(out), MemOperand(RegisterFrom(obj), referent_offset));
2441 codegen_->MaybeRecordImplicitNullCheck(invoke);
2442 }
2443 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2444 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
2445 }
2446 __ Bind(slow_path->GetExitLabel());
2447 }
2448
VisitReferenceRefersTo(HInvoke * invoke)2449 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2450 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
2451 }
2452
VisitReferenceRefersTo(HInvoke * invoke)2453 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2454 LocationSummary* locations = invoke->GetLocations();
2455 ArmVIXLAssembler* assembler = GetAssembler();
2456 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2457
2458 vixl32::Register obj = RegisterFrom(locations->InAt(0));
2459 vixl32::Register other = RegisterFrom(locations->InAt(1));
2460 vixl32::Register out = RegisterFrom(locations->Out());
2461 vixl32::Register tmp = temps.Acquire();
2462
2463 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2464 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2465
2466 {
2467 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2468 // Loading scratch register always uses 32-bit encoding.
2469 vixl::ExactAssemblyScope eas(assembler->GetVIXLAssembler(),
2470 vixl32::k32BitT32InstructionSizeInBytes);
2471 __ ldr(tmp, MemOperand(obj, referent_offset));
2472 codegen_->MaybeRecordImplicitNullCheck(invoke);
2473 }
2474 assembler->MaybeUnpoisonHeapReference(tmp);
2475 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2476
2477 if (codegen_->EmitReadBarrier()) {
2478 DCHECK(kUseBakerReadBarrier);
2479
2480 vixl32::Label calculate_result;
2481 __ Subs(out, tmp, other);
2482 __ B(eq, &calculate_result); // `out` is 0 if taken.
2483
2484 // Check if the loaded reference is null.
2485 __ Cmp(tmp, 0);
2486 __ B(eq, &calculate_result); // `out` is not 0 if taken.
2487
2488 // For correct memory visibility, we need a barrier before loading the lock word
2489 // but we already have the barrier emitted for volatile load above which is sufficient.
2490
2491 // Load the lockword and check if it is a forwarding address.
2492 static_assert(LockWord::kStateShift == 30u);
2493 static_assert(LockWord::kStateForwardingAddress == 3u);
2494 __ Ldr(tmp, MemOperand(tmp, monitor_offset));
2495 __ Cmp(tmp, Operand(0xc0000000));
2496 __ B(lo, &calculate_result); // `out` is not 0 if taken.
2497
2498 // Extract the forwarding address and subtract from `other`.
2499 __ Sub(out, other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
2500
2501 __ Bind(&calculate_result);
2502 } else {
2503 DCHECK(!codegen_->EmitReadBarrier());
2504 __ Sub(out, tmp, other);
2505 }
2506
2507 // Convert 0 to 1 and non-zero to 0 for the Boolean result (`out = (out == 0)`).
2508 __ Clz(out, out);
2509 __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
2510 }
2511
VisitThreadInterrupted(HInvoke * invoke)2512 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2513 LocationSummary* locations =
2514 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2515 locations->SetOut(Location::RequiresRegister());
2516 }
2517
VisitThreadInterrupted(HInvoke * invoke)2518 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2519 ArmVIXLAssembler* assembler = GetAssembler();
2520 vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
2521 int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
2522 __ Ldr(out, MemOperand(tr, offset));
2523 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2524 vixl32::Register temp = temps.Acquire();
2525 vixl32::Label done;
2526 vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
2527 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
2528 __ Dmb(vixl32::ISH);
2529 __ Mov(temp, 0);
2530 assembler->StoreToOffset(kStoreWord, temp, tr, offset);
2531 __ Dmb(vixl32::ISH);
2532 if (done.IsReferenced()) {
2533 __ Bind(&done);
2534 }
2535 }
2536
VisitReachabilityFence(HInvoke * invoke)2537 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
2538 LocationSummary* locations =
2539 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2540 locations->SetInAt(0, Location::Any());
2541 }
2542
VisitReachabilityFence(HInvoke * invoke)2543 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
2544
VisitIntegerDivideUnsigned(HInvoke * invoke)2545 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2546 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
2547 }
2548
VisitIntegerDivideUnsigned(HInvoke * invoke)2549 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2550 ArmVIXLAssembler* assembler = GetAssembler();
2551 LocationSummary* locations = invoke->GetLocations();
2552 vixl32::Register dividend = RegisterFrom(locations->InAt(0));
2553 vixl32::Register divisor = RegisterFrom(locations->InAt(1));
2554 vixl32::Register out = RegisterFrom(locations->Out());
2555
2556 // Check if divisor is zero, bail to managed implementation to handle.
2557 SlowPathCodeARMVIXL* slow_path =
2558 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2559 codegen_->AddSlowPath(slow_path);
2560 __ CompareAndBranchIfZero(divisor, slow_path->GetEntryLabel());
2561
2562 __ Udiv(out, dividend, divisor);
2563
2564 __ Bind(slow_path->GetExitLabel());
2565 }
2566
Use64BitExclusiveLoadStore(bool atomic,CodeGeneratorARMVIXL * codegen)2567 static inline bool Use64BitExclusiveLoadStore(bool atomic, CodeGeneratorARMVIXL* codegen) {
2568 return atomic && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
2569 }
2570
GenerateIntrinsicGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location out,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2571 static void GenerateIntrinsicGet(HInvoke* invoke,
2572 CodeGeneratorARMVIXL* codegen,
2573 DataType::Type type,
2574 std::memory_order order,
2575 bool atomic,
2576 vixl32::Register base,
2577 vixl32::Register offset,
2578 Location out,
2579 Location maybe_temp,
2580 Location maybe_temp2,
2581 Location maybe_temp3) {
2582 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2583 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2584 DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2585 DCHECK(atomic || order == std::memory_order_relaxed);
2586
2587 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2588 MemOperand address(base, offset);
2589 switch (type) {
2590 case DataType::Type::kBool:
2591 __ Ldrb(RegisterFrom(out), address);
2592 break;
2593 case DataType::Type::kInt8:
2594 __ Ldrsb(RegisterFrom(out), address);
2595 break;
2596 case DataType::Type::kUint16:
2597 __ Ldrh(RegisterFrom(out), address);
2598 break;
2599 case DataType::Type::kInt16:
2600 __ Ldrsh(RegisterFrom(out), address);
2601 break;
2602 case DataType::Type::kInt32:
2603 __ Ldr(RegisterFrom(out), address);
2604 break;
2605 case DataType::Type::kInt64:
2606 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2607 vixl32::Register strexd_tmp = RegisterFrom(maybe_temp);
2608 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2609 const vixl32::Register temp_reg = temps.Acquire();
2610 __ Add(temp_reg, base, offset);
2611 vixl32::Label loop;
2612 __ Bind(&loop);
2613 __ Ldrexd(LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2614 __ Strexd(strexd_tmp, LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2615 __ Cmp(strexd_tmp, 0);
2616 __ B(ne, &loop);
2617 } else {
2618 __ Ldrd(LowRegisterFrom(out), HighRegisterFrom(out), address);
2619 }
2620 break;
2621 case DataType::Type::kReference:
2622 if (codegen->EmitBakerReadBarrier()) {
2623 // Piggy-back on the field load path using introspection for the Baker read barrier.
2624 vixl32::Register temp = RegisterFrom(maybe_temp);
2625 __ Add(temp, base, offset);
2626 codegen->GenerateFieldLoadWithBakerReadBarrier(
2627 invoke, out, base, MemOperand(temp), /* needs_null_check= */ false);
2628 } else {
2629 __ Ldr(RegisterFrom(out), address);
2630 }
2631 break;
2632 case DataType::Type::kFloat32: {
2633 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2634 const vixl32::Register temp_reg = temps.Acquire();
2635 __ Add(temp_reg, base, offset);
2636 __ Vldr(SRegisterFrom(out), MemOperand(temp_reg));
2637 break;
2638 }
2639 case DataType::Type::kFloat64: {
2640 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2641 const vixl32::Register temp_reg = temps.Acquire();
2642 __ Add(temp_reg, base, offset);
2643 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2644 vixl32::Register lo = RegisterFrom(maybe_temp);
2645 vixl32::Register hi = RegisterFrom(maybe_temp2);
2646 vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2647 vixl32::Label loop;
2648 __ Bind(&loop);
2649 __ Ldrexd(lo, hi, MemOperand(temp_reg));
2650 __ Strexd(strexd_tmp, lo, hi, MemOperand(temp_reg));
2651 __ Cmp(strexd_tmp, 0);
2652 __ B(ne, &loop);
2653 __ Vmov(DRegisterFrom(out), lo, hi);
2654 } else {
2655 __ Vldr(DRegisterFrom(out), MemOperand(temp_reg));
2656 }
2657 break;
2658 }
2659 default:
2660 LOG(FATAL) << "Unexpected type " << type;
2661 UNREACHABLE();
2662 }
2663 if (acquire_barrier) {
2664 codegen->GenerateMemoryBarrier(
2665 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
2666 }
2667 if (type == DataType::Type::kReference && !codegen->EmitBakerReadBarrier()) {
2668 Location base_loc = LocationFrom(base);
2669 Location index_loc = LocationFrom(offset);
2670 codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
2671 }
2672 }
2673
CreateUnsafeGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2674 static void CreateUnsafeGetLocations(HInvoke* invoke,
2675 CodeGeneratorARMVIXL* codegen,
2676 DataType::Type type,
2677 bool atomic) {
2678 bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
2679 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2680 LocationSummary* locations =
2681 new (allocator) LocationSummary(invoke,
2682 can_call
2683 ? LocationSummary::kCallOnSlowPath
2684 : LocationSummary::kNoCall,
2685 kIntrinsified);
2686 if (can_call && kUseBakerReadBarrier) {
2687 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2688 }
2689 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2690 locations->SetInAt(1, Location::RequiresRegister());
2691 locations->SetInAt(2, Location::RequiresRegister());
2692 locations->SetOut(Location::RequiresRegister(),
2693 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2694 if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
2695 (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2696 // We need a temporary register for the read barrier marking slow
2697 // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
2698 // or the STREXD result for LDREXD/STREXD sequence when LDRD is non-atomic.
2699 locations->AddTemp(Location::RequiresRegister());
2700 }
2701 }
2702
GenUnsafeGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic)2703 static void GenUnsafeGet(HInvoke* invoke,
2704 CodeGeneratorARMVIXL* codegen,
2705 DataType::Type type,
2706 std::memory_order order,
2707 bool atomic) {
2708 LocationSummary* locations = invoke->GetLocations();
2709 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
2710 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
2711 Location out = locations->Out();
2712 Location maybe_temp = Location::NoLocation();
2713 if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
2714 (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2715 maybe_temp = locations->GetTemp(0);
2716 }
2717 GenerateIntrinsicGet(invoke,
2718 codegen,
2719 type,
2720 order,
2721 atomic,
2722 base,
2723 offset,
2724 out,
2725 maybe_temp,
2726 /*maybe_temp2=*/ Location::NoLocation(),
2727 /*maybe_temp3=*/ Location::NoLocation());
2728 }
2729
VisitUnsafeGet(HInvoke * invoke)2730 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2731 VisitJdkUnsafeGet(invoke);
2732 }
2733
VisitUnsafeGet(HInvoke * invoke)2734 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2735 VisitJdkUnsafeGet(invoke);
2736 }
2737
VisitUnsafeGetVolatile(HInvoke * invoke)2738 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2739 VisitJdkUnsafeGetVolatile(invoke);
2740 }
2741
VisitUnsafeGetVolatile(HInvoke * invoke)2742 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2743 VisitJdkUnsafeGetVolatile(invoke);
2744 }
2745
VisitUnsafeGetLong(HInvoke * invoke)2746 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2747 VisitJdkUnsafeGetLong(invoke);
2748 }
2749
VisitUnsafeGetLong(HInvoke * invoke)2750 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2751 VisitJdkUnsafeGetLong(invoke);
2752 }
2753
VisitUnsafeGetLongVolatile(HInvoke * invoke)2754 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2755 VisitJdkUnsafeGetLongVolatile(invoke);
2756 }
2757
VisitUnsafeGetLongVolatile(HInvoke * invoke)2758 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2759 VisitJdkUnsafeGetLongVolatile(invoke);
2760 }
2761
VisitUnsafeGetObject(HInvoke * invoke)2762 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2763 VisitJdkUnsafeGetReference(invoke);
2764 }
2765
VisitUnsafeGetObject(HInvoke * invoke)2766 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2767 VisitJdkUnsafeGetReference(invoke);
2768 }
2769
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2770 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2771 VisitJdkUnsafeGetReferenceVolatile(invoke);
2772 }
2773
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2774 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2775 VisitJdkUnsafeGetReferenceVolatile(invoke);
2776 }
2777
VisitUnsafeGetByte(HInvoke * invoke)2778 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
2779 VisitJdkUnsafeGetByte(invoke);
2780 }
2781
VisitUnsafeGetByte(HInvoke * invoke)2782 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
2783 VisitJdkUnsafeGetByte(invoke);
2784 }
2785
VisitJdkUnsafeGet(HInvoke * invoke)2786 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2787 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
2788 }
2789
VisitJdkUnsafeGet(HInvoke * invoke)2790 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2791 GenUnsafeGet(
2792 invoke, codegen_, DataType::Type::kInt32, std::memory_order_relaxed, /*atomic=*/ false);
2793 }
2794
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2795 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2796 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2797 }
2798
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2799 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2800 GenUnsafeGet(
2801 invoke, codegen_, DataType::Type::kInt32, std::memory_order_seq_cst, /*atomic=*/ true);
2802 }
2803
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2804 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2805 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2806 }
2807
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2808 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2809 GenUnsafeGet(
2810 invoke, codegen_, DataType::Type::kInt32, std::memory_order_acquire, /*atomic=*/ true);
2811 }
2812
VisitJdkUnsafeGetLong(HInvoke * invoke)2813 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2814 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
2815 }
2816
VisitJdkUnsafeGetLong(HInvoke * invoke)2817 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2818 GenUnsafeGet(
2819 invoke, codegen_, DataType::Type::kInt64, std::memory_order_relaxed, /*atomic=*/ false);
2820 }
2821
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2822 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2823 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2824 }
2825
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2826 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2827 GenUnsafeGet(
2828 invoke, codegen_, DataType::Type::kInt64, std::memory_order_seq_cst, /*atomic=*/ true);
2829 }
2830
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2831 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2832 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2833 }
2834
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2835 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2836 GenUnsafeGet(
2837 invoke, codegen_, DataType::Type::kInt64, std::memory_order_acquire, /*atomic=*/ true);
2838 }
2839
VisitJdkUnsafeGetReference(HInvoke * invoke)2840 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2841 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
2842 }
2843
VisitJdkUnsafeGetReference(HInvoke * invoke)2844 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2845 GenUnsafeGet(
2846 invoke, codegen_, DataType::Type::kReference, std::memory_order_relaxed, /*atomic=*/ false);
2847 }
2848
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2849 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2850 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2851 }
2852
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2853 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2854 GenUnsafeGet(
2855 invoke, codegen_, DataType::Type::kReference, std::memory_order_seq_cst, /*atomic=*/ true);
2856 }
2857
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2858 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2859 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2860 }
2861
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2862 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2863 GenUnsafeGet(
2864 invoke, codegen_, DataType::Type::kReference, std::memory_order_acquire, /*atomic=*/ true);
2865 }
2866
VisitJdkUnsafeGetByte(HInvoke * invoke)2867 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2868 CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
2869 }
2870
VisitJdkUnsafeGetByte(HInvoke * invoke)2871 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2872 GenUnsafeGet(
2873 invoke, codegen_, DataType::Type::kInt8, std::memory_order_relaxed, /*atomic=*/ false);
2874 }
2875
GenerateIntrinsicSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location value,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2876 static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
2877 DataType::Type type,
2878 std::memory_order order,
2879 bool atomic,
2880 vixl32::Register base,
2881 vixl32::Register offset,
2882 Location value,
2883 Location maybe_temp,
2884 Location maybe_temp2,
2885 Location maybe_temp3) {
2886 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2887 bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
2888 DCHECK(release_barrier || order == std::memory_order_relaxed);
2889 DCHECK(atomic || order == std::memory_order_relaxed);
2890
2891 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2892 if (release_barrier) {
2893 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
2894 }
2895 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2896 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2897 vixl32::Register temp = temps.Acquire();
2898 __ Mov(temp, RegisterFrom(value));
2899 assembler->PoisonHeapReference(temp);
2900 value = LocationFrom(temp);
2901 }
2902 MemOperand address = offset.IsValid() ? MemOperand(base, offset) : MemOperand(base);
2903 if (offset.IsValid() && (DataType::Is64BitType(type) || type == DataType::Type::kFloat32)) {
2904 const vixl32::Register temp_reg = temps.Acquire();
2905 __ Add(temp_reg, base, offset);
2906 address = MemOperand(temp_reg);
2907 }
2908 switch (type) {
2909 case DataType::Type::kBool:
2910 case DataType::Type::kInt8:
2911 __ Strb(RegisterFrom(value), address);
2912 break;
2913 case DataType::Type::kUint16:
2914 case DataType::Type::kInt16:
2915 __ Strh(RegisterFrom(value), address);
2916 break;
2917 case DataType::Type::kReference:
2918 case DataType::Type::kInt32:
2919 __ Str(RegisterFrom(value), address);
2920 break;
2921 case DataType::Type::kInt64:
2922 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2923 vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2924 vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2925 vixl32::Label loop;
2926 __ Bind(&loop);
2927 __ Ldrexd(lo_tmp, hi_tmp, address); // Ignore the retrieved value.
2928 __ Strexd(lo_tmp, LowRegisterFrom(value), HighRegisterFrom(value), address);
2929 __ Cmp(lo_tmp, 0);
2930 __ B(ne, &loop);
2931 } else {
2932 __ Strd(LowRegisterFrom(value), HighRegisterFrom(value), address);
2933 }
2934 break;
2935 case DataType::Type::kFloat32:
2936 __ Vstr(SRegisterFrom(value), address);
2937 break;
2938 case DataType::Type::kFloat64:
2939 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2940 vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2941 vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2942 vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2943 vixl32::Label loop;
2944 __ Bind(&loop);
2945 __ Ldrexd(lo_tmp, hi_tmp, address); // Ignore the retrieved value.
2946 __ Vmov(lo_tmp, hi_tmp, DRegisterFrom(value));
2947 __ Strexd(strexd_tmp, lo_tmp, hi_tmp, address);
2948 __ Cmp(strexd_tmp, 0);
2949 __ B(ne, &loop);
2950 } else {
2951 __ Vstr(DRegisterFrom(value), address);
2952 }
2953 break;
2954 default:
2955 LOG(FATAL) << "Unexpected type " << type;
2956 UNREACHABLE();
2957 }
2958 if (seq_cst_barrier) {
2959 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2960 }
2961 }
2962
CreateUnsafePutLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2963 static void CreateUnsafePutLocations(HInvoke* invoke,
2964 CodeGeneratorARMVIXL* codegen,
2965 DataType::Type type,
2966 bool atomic) {
2967 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2968 LocationSummary* locations =
2969 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2970 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2971 locations->SetInAt(1, Location::RequiresRegister());
2972 locations->SetInAt(2, Location::RequiresRegister());
2973 locations->SetInAt(3, Location::RequiresRegister());
2974
2975 if (type == DataType::Type::kInt64) {
2976 // Potentially need temps for ldrexd-strexd loop.
2977 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2978 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
2979 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
2980 }
2981 } else if (type == DataType::Type::kReference) {
2982 // Temp for card-marking.
2983 locations->AddTemp(Location::RequiresRegister()); // Temp.
2984 }
2985 }
2986
GenUnsafePut(HInvoke * invoke,DataType::Type type,std::memory_order order,bool atomic,CodeGeneratorARMVIXL * codegen)2987 static void GenUnsafePut(HInvoke* invoke,
2988 DataType::Type type,
2989 std::memory_order order,
2990 bool atomic,
2991 CodeGeneratorARMVIXL* codegen) {
2992 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2993
2994 LocationSummary* locations = invoke->GetLocations();
2995 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
2996 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
2997 Location value = locations->InAt(3);
2998 Location maybe_temp = Location::NoLocation();
2999 Location maybe_temp2 = Location::NoLocation();
3000 if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
3001 maybe_temp = locations->GetTemp(0);
3002 maybe_temp2 = locations->GetTemp(1);
3003 }
3004
3005 GenerateIntrinsicSet(codegen,
3006 type,
3007 order,
3008 atomic,
3009 base,
3010 offset,
3011 value,
3012 maybe_temp,
3013 maybe_temp2,
3014 /*maybe_temp3=*/ Location::NoLocation());
3015
3016 if (type == DataType::Type::kReference) {
3017 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3018 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3019 vixl32::Register card = temps.Acquire();
3020 bool value_can_be_null = true; // TODO: Worth finding out this information?
3021 codegen->MaybeMarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
3022 }
3023 }
3024
VisitUnsafePut(HInvoke * invoke)3025 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3026 VisitJdkUnsafePut(invoke);
3027 }
3028
VisitUnsafePut(HInvoke * invoke)3029 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3030 VisitJdkUnsafePut(invoke);
3031 }
3032
VisitUnsafePutOrdered(HInvoke * invoke)3033 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3034 VisitJdkUnsafePutOrdered(invoke);
3035 }
3036
VisitUnsafePutOrdered(HInvoke * invoke)3037 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3038 VisitJdkUnsafePutOrdered(invoke);
3039 }
3040
VisitUnsafePutVolatile(HInvoke * invoke)3041 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3042 VisitJdkUnsafePutVolatile(invoke);
3043 }
3044
VisitUnsafePutVolatile(HInvoke * invoke)3045 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3046 VisitJdkUnsafePutVolatile(invoke);
3047 }
VisitUnsafePutObject(HInvoke * invoke)3048 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3049 VisitJdkUnsafePutReference(invoke);
3050 }
3051
VisitUnsafePutObject(HInvoke * invoke)3052 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3053 VisitJdkUnsafePutReference(invoke);
3054 }
3055
VisitUnsafePutObjectOrdered(HInvoke * invoke)3056 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3057 VisitJdkUnsafePutObjectOrdered(invoke);
3058 }
3059
VisitUnsafePutObjectOrdered(HInvoke * invoke)3060 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3061 VisitJdkUnsafePutObjectOrdered(invoke);
3062 }
3063
VisitUnsafePutObjectVolatile(HInvoke * invoke)3064 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3065 VisitJdkUnsafePutReferenceVolatile(invoke);
3066 }
3067
VisitUnsafePutObjectVolatile(HInvoke * invoke)3068 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3069 VisitJdkUnsafePutReferenceVolatile(invoke);
3070 }
3071
VisitUnsafePutLong(HInvoke * invoke)3072 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3073 VisitJdkUnsafePutLong(invoke);
3074 }
3075
VisitUnsafePutLong(HInvoke * invoke)3076 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3077 VisitJdkUnsafePutLong(invoke);
3078 }
3079
VisitUnsafePutLongOrdered(HInvoke * invoke)3080 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3081 VisitJdkUnsafePutLongOrdered(invoke);
3082 }
3083
VisitUnsafePutLongOrdered(HInvoke * invoke)3084 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3085 VisitJdkUnsafePutLongOrdered(invoke);
3086 }
3087
VisitUnsafePutLongVolatile(HInvoke * invoke)3088 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3089 VisitJdkUnsafePutLongVolatile(invoke);
3090 }
3091
VisitUnsafePutLongVolatile(HInvoke * invoke)3092 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3093 VisitJdkUnsafePutLongVolatile(invoke);
3094 }
3095
VisitUnsafePutByte(HInvoke * invoke)3096 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
3097 VisitJdkUnsafePutByte(invoke);
3098 }
3099
VisitUnsafePutByte(HInvoke * invoke)3100 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
3101 VisitJdkUnsafePutByte(invoke);
3102 }
3103
VisitJdkUnsafePut(HInvoke * invoke)3104 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3105 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
3106 }
3107
VisitJdkUnsafePut(HInvoke * invoke)3108 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3109 GenUnsafePut(invoke,
3110 DataType::Type::kInt32,
3111 std::memory_order_relaxed,
3112 /*atomic=*/ false,
3113 codegen_);
3114 }
3115
VisitJdkUnsafePutByte(HInvoke * invoke)3116 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
3117 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
3118 }
3119
VisitJdkUnsafePutByte(HInvoke * invoke)3120 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
3121 GenUnsafePut(invoke,
3122 DataType::Type::kInt8,
3123 std::memory_order_relaxed,
3124 /*atomic=*/ false,
3125 codegen_);
3126 }
3127
VisitJdkUnsafePutOrdered(HInvoke * invoke)3128 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3129 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3130 }
3131
VisitJdkUnsafePutOrdered(HInvoke * invoke)3132 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3133 GenUnsafePut(invoke,
3134 DataType::Type::kInt32,
3135 std::memory_order_release,
3136 /*atomic=*/ true,
3137 codegen_);
3138 }
3139
VisitJdkUnsafePutVolatile(HInvoke * invoke)3140 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3141 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3142 }
3143
VisitJdkUnsafePutVolatile(HInvoke * invoke)3144 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3145 GenUnsafePut(invoke,
3146 DataType::Type::kInt32,
3147 std::memory_order_seq_cst,
3148 /*atomic=*/ true,
3149 codegen_);
3150 }
3151
VisitJdkUnsafePutRelease(HInvoke * invoke)3152 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3153 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3154 }
3155
VisitJdkUnsafePutRelease(HInvoke * invoke)3156 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3157 GenUnsafePut(invoke,
3158 DataType::Type::kInt32,
3159 std::memory_order_release,
3160 /*atomic=*/ true,
3161 codegen_);
3162 }
3163
VisitJdkUnsafePutReference(HInvoke * invoke)3164 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
3165 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
3166 }
3167
VisitJdkUnsafePutReference(HInvoke * invoke)3168 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
3169 GenUnsafePut(invoke,
3170 DataType::Type::kReference,
3171 std::memory_order_relaxed,
3172 /*atomic=*/ false,
3173 codegen_);
3174 }
3175
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3176 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3177 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3178 }
3179
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3180 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3181 GenUnsafePut(invoke,
3182 DataType::Type::kReference,
3183 std::memory_order_release,
3184 /*atomic=*/ true,
3185 codegen_);
3186 }
3187
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)3188 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
3189 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3190 }
3191
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)3192 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
3193 GenUnsafePut(invoke,
3194 DataType::Type::kReference,
3195 std::memory_order_seq_cst,
3196 /*atomic=*/ true,
3197 codegen_);
3198 }
3199
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)3200 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
3201 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3202 }
3203
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)3204 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
3205 GenUnsafePut(invoke,
3206 DataType::Type::kReference,
3207 std::memory_order_release,
3208 /*atomic=*/ true,
3209 codegen_);
3210 }
3211
VisitJdkUnsafePutLong(HInvoke * invoke)3212 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3213 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
3214 }
3215
VisitJdkUnsafePutLong(HInvoke * invoke)3216 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3217 GenUnsafePut(invoke,
3218 DataType::Type::kInt64,
3219 std::memory_order_relaxed,
3220 /*atomic=*/ false,
3221 codegen_);
3222 }
3223
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3224 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3225 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3226 }
3227
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3228 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3229 GenUnsafePut(invoke,
3230 DataType::Type::kInt64,
3231 std::memory_order_release,
3232 /*atomic=*/ true,
3233 codegen_);
3234 }
3235
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3236 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3237 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3238 }
3239
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3240 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3241 GenUnsafePut(invoke,
3242 DataType::Type::kInt64,
3243 std::memory_order_seq_cst,
3244 /*atomic=*/ true,
3245 codegen_);
3246 }
3247
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3248 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3249 CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3250 }
3251
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3252 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3253 GenUnsafePut(invoke,
3254 DataType::Type::kInt64,
3255 std::memory_order_release,
3256 /*atomic=*/ true,
3257 codegen_);
3258 }
3259
EmitLoadExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,Location old_value)3260 static void EmitLoadExclusive(CodeGeneratorARMVIXL* codegen,
3261 DataType::Type type,
3262 vixl32::Register ptr,
3263 Location old_value) {
3264 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3265 switch (type) {
3266 case DataType::Type::kBool:
3267 case DataType::Type::kInt8:
3268 __ Ldrexb(RegisterFrom(old_value), MemOperand(ptr));
3269 break;
3270 case DataType::Type::kUint16:
3271 case DataType::Type::kInt16:
3272 __ Ldrexh(RegisterFrom(old_value), MemOperand(ptr));
3273 break;
3274 case DataType::Type::kInt32:
3275 case DataType::Type::kReference:
3276 __ Ldrex(RegisterFrom(old_value), MemOperand(ptr));
3277 break;
3278 case DataType::Type::kInt64:
3279 __ Ldrexd(LowRegisterFrom(old_value), HighRegisterFrom(old_value), MemOperand(ptr));
3280 break;
3281 default:
3282 LOG(FATAL) << "Unexpected type: " << type;
3283 UNREACHABLE();
3284 }
3285 switch (type) {
3286 case DataType::Type::kInt8:
3287 __ Sxtb(RegisterFrom(old_value), RegisterFrom(old_value));
3288 break;
3289 case DataType::Type::kInt16:
3290 __ Sxth(RegisterFrom(old_value), RegisterFrom(old_value));
3291 break;
3292 case DataType::Type::kReference:
3293 assembler->MaybeUnpoisonHeapReference(RegisterFrom(old_value));
3294 break;
3295 default:
3296 break;
3297 }
3298 }
3299
EmitStoreExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,vixl32::Register store_result,Location new_value)3300 static void EmitStoreExclusive(CodeGeneratorARMVIXL* codegen,
3301 DataType::Type type,
3302 vixl32::Register ptr,
3303 vixl32::Register store_result,
3304 Location new_value) {
3305 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3306 if (type == DataType::Type::kReference) {
3307 assembler->MaybePoisonHeapReference(RegisterFrom(new_value));
3308 }
3309 switch (type) {
3310 case DataType::Type::kBool:
3311 case DataType::Type::kInt8:
3312 __ Strexb(store_result, RegisterFrom(new_value), MemOperand(ptr));
3313 break;
3314 case DataType::Type::kUint16:
3315 case DataType::Type::kInt16:
3316 __ Strexh(store_result, RegisterFrom(new_value), MemOperand(ptr));
3317 break;
3318 case DataType::Type::kInt32:
3319 case DataType::Type::kReference:
3320 __ Strex(store_result, RegisterFrom(new_value), MemOperand(ptr));
3321 break;
3322 case DataType::Type::kInt64:
3323 __ Strexd(
3324 store_result, LowRegisterFrom(new_value), HighRegisterFrom(new_value), MemOperand(ptr));
3325 break;
3326 default:
3327 LOG(FATAL) << "Unexpected type: " << type;
3328 UNREACHABLE();
3329 }
3330 if (type == DataType::Type::kReference) {
3331 assembler->MaybeUnpoisonHeapReference(RegisterFrom(new_value));
3332 }
3333 }
3334
GenerateCompareAndSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,bool strong,vixl32::Label * cmp_failure,bool cmp_failure_is_far_target,vixl32::Register ptr,Location expected,Location new_value,Location old_value,vixl32::Register store_result,vixl32::Register success)3335 static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
3336 DataType::Type type,
3337 bool strong,
3338 vixl32::Label* cmp_failure,
3339 bool cmp_failure_is_far_target,
3340 vixl32::Register ptr,
3341 Location expected,
3342 Location new_value,
3343 Location old_value,
3344 vixl32::Register store_result,
3345 vixl32::Register success) {
3346 // For kReference, the `expected` shall be a register pair when called from a read barrier
3347 // slow path, specifying both the original `expected` as well as the unmarked old value from
3348 // the main path attempt to emit CAS when it matched `expected` after marking.
3349 // Otherwise the type of `expected` shall match the type of `new_value` and `old_value`.
3350 if (type == DataType::Type::kInt64) {
3351 DCHECK(expected.IsRegisterPair());
3352 DCHECK(new_value.IsRegisterPair());
3353 DCHECK(old_value.IsRegisterPair());
3354 } else {
3355 DCHECK(expected.IsRegister() ||
3356 (type == DataType::Type::kReference && expected.IsRegisterPair()));
3357 DCHECK(new_value.IsRegister());
3358 DCHECK(old_value.IsRegister());
3359 // Make sure the unmarked old value for reference CAS slow path is not clobbered by STREX.
3360 DCHECK(!expected.Contains(LocationFrom(store_result)));
3361 }
3362
3363 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3364
3365 // do {
3366 // old_value = [ptr]; // Load exclusive.
3367 // if (old_value != expected) goto cmp_failure;
3368 // store_result = failed([ptr] <- new_value); // Store exclusive.
3369 // } while (strong && store_result);
3370 //
3371 // If `success` is a valid register, there are additional instructions in the above code
3372 // to report success with value 1 and failure with value 0 in that register.
3373
3374 vixl32::Label loop_head;
3375 if (strong) {
3376 __ Bind(&loop_head);
3377 }
3378 EmitLoadExclusive(codegen, type, ptr, old_value);
3379 // We do not need to initialize the failure code for comparison failure if the
3380 // branch goes to the read barrier slow path that clobbers `success` anyway.
3381 bool init_failure_for_cmp =
3382 success.IsValid() &&
3383 !(type == DataType::Type::kReference && codegen->EmitReadBarrier() && expected.IsRegister());
3384 // Instruction scheduling: Loading a constant between LDREX* and using the loaded value
3385 // is essentially free, so prepare the failure value here if we can.
3386 bool init_failure_for_cmp_early =
3387 init_failure_for_cmp && !old_value.Contains(LocationFrom(success));
3388 if (init_failure_for_cmp_early) {
3389 __ Mov(success, 0); // Indicate failure if the comparison fails.
3390 }
3391 if (type == DataType::Type::kInt64) {
3392 __ Cmp(LowRegisterFrom(old_value), LowRegisterFrom(expected));
3393 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3394 __ it(eq);
3395 __ cmp(eq, HighRegisterFrom(old_value), HighRegisterFrom(expected));
3396 } else if (expected.IsRegisterPair()) {
3397 DCHECK_EQ(type, DataType::Type::kReference);
3398 DCHECK(!expected.Contains(old_value));
3399 // Check if the loaded value matches any of the two registers in `expected`.
3400 __ Cmp(RegisterFrom(old_value), LowRegisterFrom(expected));
3401 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3402 __ it(ne);
3403 __ cmp(ne, RegisterFrom(old_value), HighRegisterFrom(expected));
3404 } else {
3405 __ Cmp(RegisterFrom(old_value), RegisterFrom(expected));
3406 }
3407 if (init_failure_for_cmp && !init_failure_for_cmp_early) {
3408 __ Mov(LeaveFlags, success, 0); // Indicate failure if the comparison fails.
3409 }
3410 __ B(ne, cmp_failure, /*is_far_target=*/ cmp_failure_is_far_target);
3411 EmitStoreExclusive(codegen, type, ptr, store_result, new_value);
3412 if (strong) {
3413 // Instruction scheduling: Loading a constant between STREX* and using its result
3414 // is essentially free, so prepare the success value here if needed and possible.
3415 if (success.IsValid() && !success.Is(store_result)) {
3416 __ Mov(success, 1); // Indicate success if the store succeeds.
3417 }
3418 __ Cmp(store_result, 0);
3419 if (success.IsValid() && success.Is(store_result)) {
3420 __ Mov(LeaveFlags, success, 1); // Indicate success if the store succeeds.
3421 }
3422 __ B(ne, &loop_head, /*is_far_target=*/ false);
3423 } else {
3424 // Weak CAS (VarHandle.CompareAndExchange variants) always indicates success.
3425 DCHECK(success.IsValid());
3426 // Flip the `store_result` to indicate success by 1 and failure by 0.
3427 __ Eor(success, store_result, 1);
3428 }
3429 }
3430
3431 class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
3432 public:
ReadBarrierCasSlowPathARMVIXL(HInvoke * invoke,bool strong,vixl32::Register base,vixl32::Register offset,vixl32::Register expected,vixl32::Register new_value,vixl32::Register old_value,vixl32::Register old_value_temp,vixl32::Register store_result,vixl32::Register success,CodeGeneratorARMVIXL * arm_codegen)3433 explicit ReadBarrierCasSlowPathARMVIXL(HInvoke* invoke,
3434 bool strong,
3435 vixl32::Register base,
3436 vixl32::Register offset,
3437 vixl32::Register expected,
3438 vixl32::Register new_value,
3439 vixl32::Register old_value,
3440 vixl32::Register old_value_temp,
3441 vixl32::Register store_result,
3442 vixl32::Register success,
3443 CodeGeneratorARMVIXL* arm_codegen)
3444 : SlowPathCodeARMVIXL(invoke),
3445 strong_(strong),
3446 base_(base),
3447 offset_(offset),
3448 expected_(expected),
3449 new_value_(new_value),
3450 old_value_(old_value),
3451 old_value_temp_(old_value_temp),
3452 store_result_(store_result),
3453 success_(success),
3454 mark_old_value_slow_path_(nullptr),
3455 update_old_value_slow_path_(nullptr) {
3456 if (!kUseBakerReadBarrier) {
3457 // We need to add the slow path now, it is too late when emitting slow path code.
3458 mark_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3459 invoke,
3460 Location::RegisterLocation(old_value_temp.GetCode()),
3461 Location::RegisterLocation(old_value.GetCode()),
3462 Location::RegisterLocation(base.GetCode()),
3463 /*offset=*/ 0u,
3464 /*index=*/ Location::RegisterLocation(offset.GetCode()));
3465 if (!success.IsValid()) {
3466 update_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3467 invoke,
3468 Location::RegisterLocation(old_value.GetCode()),
3469 Location::RegisterLocation(old_value_temp.GetCode()),
3470 Location::RegisterLocation(base.GetCode()),
3471 /*offset=*/ 0u,
3472 /*index=*/ Location::RegisterLocation(offset.GetCode()));
3473 }
3474 }
3475 }
3476
GetDescription() const3477 const char* GetDescription() const override { return "ReadBarrierCasSlowPathARMVIXL"; }
3478
EmitNativeCode(CodeGenerator * codegen)3479 void EmitNativeCode(CodeGenerator* codegen) override {
3480 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
3481 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
3482 __ Bind(GetEntryLabel());
3483
3484 // Mark the `old_value_` from the main path and compare with `expected_`.
3485 if (kUseBakerReadBarrier) {
3486 DCHECK(mark_old_value_slow_path_ == nullptr);
3487 arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_);
3488 } else {
3489 DCHECK(mark_old_value_slow_path_ != nullptr);
3490 __ B(mark_old_value_slow_path_->GetEntryLabel());
3491 __ Bind(mark_old_value_slow_path_->GetExitLabel());
3492 }
3493 __ Cmp(old_value_temp_, expected_);
3494 if (success_.IsValid()) {
3495 __ Mov(LeaveFlags, success_, 0); // Indicate failure if we take the branch out.
3496 } else {
3497 // In case of failure, update the `old_value_` with the marked reference.
3498 ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3499 __ it(ne);
3500 __ mov(ne, old_value_, old_value_temp_);
3501 }
3502 __ B(ne, GetExitLabel());
3503
3504 // The old value we have read did not match `expected` (which is always a to-space
3505 // reference) but after the read barrier the marked to-space value matched, so the
3506 // old value must be a from-space reference to the same object. Do the same CAS loop
3507 // as the main path but check for both `expected` and the unmarked old value
3508 // representing the to-space and from-space references for the same object.
3509
3510 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3511 vixl32::Register tmp_ptr = temps.Acquire();
3512
3513 // Recalculate the `tmp_ptr` clobbered above.
3514 __ Add(tmp_ptr, base_, offset_);
3515
3516 vixl32::Label mark_old_value;
3517 GenerateCompareAndSet(arm_codegen,
3518 DataType::Type::kReference,
3519 strong_,
3520 /*cmp_failure=*/ success_.IsValid() ? GetExitLabel() : &mark_old_value,
3521 /*cmp_failure_is_far_target=*/ success_.IsValid(),
3522 tmp_ptr,
3523 /*expected=*/ LocationFrom(expected_, old_value_),
3524 /*new_value=*/ LocationFrom(new_value_),
3525 /*old_value=*/ LocationFrom(old_value_temp_),
3526 store_result_,
3527 success_);
3528 if (!success_.IsValid()) {
3529 // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
3530 // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
3531 __ Mov(old_value_, expected_);
3532 }
3533
3534 __ B(GetExitLabel());
3535
3536 if (!success_.IsValid()) {
3537 __ Bind(&mark_old_value);
3538 if (kUseBakerReadBarrier) {
3539 DCHECK(update_old_value_slow_path_ == nullptr);
3540 arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_);
3541 } else {
3542 // Note: We could redirect the `failure` above directly to the entry label and bind
3543 // the exit label in the main path, but the main path would need to access the
3544 // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
3545 DCHECK(update_old_value_slow_path_ != nullptr);
3546 __ B(update_old_value_slow_path_->GetEntryLabel());
3547 __ Bind(update_old_value_slow_path_->GetExitLabel());
3548 }
3549 __ B(GetExitLabel());
3550 }
3551 }
3552
3553 private:
3554 bool strong_;
3555 vixl32::Register base_;
3556 vixl32::Register offset_;
3557 vixl32::Register expected_;
3558 vixl32::Register new_value_;
3559 vixl32::Register old_value_;
3560 vixl32::Register old_value_temp_;
3561 vixl32::Register store_result_;
3562 vixl32::Register success_;
3563 SlowPathCodeARMVIXL* mark_old_value_slow_path_;
3564 SlowPathCodeARMVIXL* update_old_value_slow_path_;
3565 };
3566
CreateUnsafeCASLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3567 static void CreateUnsafeCASLocations(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3568 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
3569 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3570 LocationSummary* locations =
3571 new (allocator) LocationSummary(invoke,
3572 can_call
3573 ? LocationSummary::kCallOnSlowPath
3574 : LocationSummary::kNoCall,
3575 kIntrinsified);
3576 if (can_call && kUseBakerReadBarrier) {
3577 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
3578 }
3579 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
3580 locations->SetInAt(1, Location::RequiresRegister());
3581 locations->SetInAt(2, Location::RequiresRegister());
3582 locations->SetInAt(3, Location::RequiresRegister());
3583 locations->SetInAt(4, Location::RequiresRegister());
3584
3585 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3586
3587 // Temporary register used in CAS. In the object case (UnsafeCASObject intrinsic),
3588 // this is also used for card-marking, and possibly for read barrier.
3589 locations->AddTemp(Location::RequiresRegister());
3590 }
3591
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)3592 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
3593 DCHECK_NE(type, DataType::Type::kInt64);
3594
3595 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3596 LocationSummary* locations = invoke->GetLocations();
3597
3598 vixl32::Register out = OutputRegister(invoke); // Boolean result.
3599 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
3600 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
3601 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
3602 vixl32::Register new_value = InputRegisterAt(invoke, 4); // New value.
3603
3604 vixl32::Register tmp = RegisterFrom(locations->GetTemp(0)); // Temporary.
3605
3606 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3607 vixl32::Register tmp_ptr = temps.Acquire();
3608
3609 if (type == DataType::Type::kReference) {
3610 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
3611 // object and scan the receiver at the next GC for nothing.
3612 bool value_can_be_null = true; // TODO: Worth finding out this information?
3613 codegen->MaybeMarkGCCard(tmp_ptr, tmp, base, new_value, value_can_be_null);
3614 }
3615
3616 vixl32::Label exit_loop_label;
3617 vixl32::Label* exit_loop = &exit_loop_label;
3618 vixl32::Label* cmp_failure = &exit_loop_label;
3619
3620 if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
3621 // If marking, check if the stored reference is a from-space reference to the same
3622 // object as the to-space reference `expected`. If so, perform a custom CAS loop.
3623 ReadBarrierCasSlowPathARMVIXL* slow_path =
3624 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
3625 invoke,
3626 /*strong=*/ true,
3627 base,
3628 offset,
3629 expected,
3630 new_value,
3631 /*old_value=*/ tmp,
3632 /*old_value_temp=*/ out,
3633 /*store_result=*/ out,
3634 /*success=*/ out,
3635 codegen);
3636 codegen->AddSlowPath(slow_path);
3637 exit_loop = slow_path->GetExitLabel();
3638 cmp_failure = slow_path->GetEntryLabel();
3639 }
3640
3641 // Unsafe CAS operations have std::memory_order_seq_cst semantics.
3642 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3643 __ Add(tmp_ptr, base, offset);
3644 GenerateCompareAndSet(codegen,
3645 type,
3646 /*strong=*/ true,
3647 cmp_failure,
3648 /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
3649 tmp_ptr,
3650 /*expected=*/ LocationFrom(expected), // TODO: Int64
3651 /*new_value=*/ LocationFrom(new_value), // TODO: Int64
3652 /*old_value=*/ LocationFrom(tmp), // TODO: Int64
3653 /*store_result=*/ tmp,
3654 /*success=*/ out);
3655 __ Bind(exit_loop);
3656 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3657
3658 if (type == DataType::Type::kReference) {
3659 codegen->MaybeGenerateMarkingRegisterCheck(/*code=*/ 128, /*temp_loc=*/ LocationFrom(tmp_ptr));
3660 }
3661 }
3662
VisitUnsafeCASInt(HInvoke * invoke)3663 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3664 VisitJdkUnsafeCASInt(invoke);
3665 }
VisitUnsafeCASObject(HInvoke * invoke)3666 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3667 VisitJdkUnsafeCASObject(invoke);
3668 }
3669
VisitJdkUnsafeCASInt(HInvoke * invoke)3670 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3671 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3672 VisitJdkUnsafeCompareAndSetInt(invoke);
3673 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3674 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3675 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3676 VisitJdkUnsafeCompareAndSetReference(invoke);
3677 }
3678
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3679 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3680 CreateUnsafeCASLocations(invoke, codegen_);
3681 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3682 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3683 // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3684 if (codegen_->EmitNonBakerReadBarrier()) {
3685 return;
3686 }
3687
3688 CreateUnsafeCASLocations(invoke, codegen_);
3689 }
3690
VisitUnsafeCASInt(HInvoke * invoke)3691 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3692 VisitJdkUnsafeCASInt(invoke);
3693 }
VisitUnsafeCASObject(HInvoke * invoke)3694 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3695 VisitJdkUnsafeCASObject(invoke);
3696 }
3697
VisitJdkUnsafeCASInt(HInvoke * invoke)3698 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3699 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3700 VisitJdkUnsafeCompareAndSetInt(invoke);
3701 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3702 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3703 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3704 VisitJdkUnsafeCompareAndSetReference(invoke);
3705 }
3706
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3707 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3708 GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
3709 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3710 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3711 // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3712 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3713
3714 GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
3715 }
3716
3717 enum class GetAndUpdateOp {
3718 kSet,
3719 kAdd,
3720 kAddWithByteSwap,
3721 kAnd,
3722 kOr,
3723 kXor
3724 };
3725
GenerateGetAndUpdate(CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,vixl32::Register ptr,Location arg,Location old_value,vixl32::Register store_result,Location maybe_temp,Location maybe_vreg_temp)3726 static void GenerateGetAndUpdate(CodeGeneratorARMVIXL* codegen,
3727 GetAndUpdateOp get_and_update_op,
3728 DataType::Type load_store_type,
3729 vixl32::Register ptr,
3730 Location arg,
3731 Location old_value,
3732 vixl32::Register store_result,
3733 Location maybe_temp,
3734 Location maybe_vreg_temp) {
3735 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3736
3737 Location loaded_value;
3738 Location new_value;
3739 switch (get_and_update_op) {
3740 case GetAndUpdateOp::kSet:
3741 loaded_value = old_value;
3742 new_value = arg;
3743 break;
3744 case GetAndUpdateOp::kAddWithByteSwap:
3745 if (old_value.IsRegisterPair()) {
3746 // To avoid register overlap when reversing bytes, load into temps.
3747 DCHECK(maybe_temp.IsRegisterPair());
3748 loaded_value = maybe_temp;
3749 new_value = loaded_value; // Use the same temporaries for the new value.
3750 break;
3751 }
3752 FALLTHROUGH_INTENDED;
3753 case GetAndUpdateOp::kAdd:
3754 if (old_value.IsFpuRegisterPair()) {
3755 DCHECK(maybe_temp.IsRegisterPair());
3756 loaded_value = maybe_temp;
3757 new_value = loaded_value; // Use the same temporaries for the new value.
3758 break;
3759 }
3760 if (old_value.IsFpuRegister()) {
3761 DCHECK(maybe_temp.IsRegister());
3762 loaded_value = maybe_temp;
3763 new_value = loaded_value; // Use the same temporary for the new value.
3764 break;
3765 }
3766 FALLTHROUGH_INTENDED;
3767 case GetAndUpdateOp::kAnd:
3768 case GetAndUpdateOp::kOr:
3769 case GetAndUpdateOp::kXor:
3770 loaded_value = old_value;
3771 new_value = maybe_temp;
3772 break;
3773 }
3774
3775 vixl32::Label loop_label;
3776 __ Bind(&loop_label);
3777 EmitLoadExclusive(codegen, load_store_type, ptr, loaded_value);
3778 switch (get_and_update_op) {
3779 case GetAndUpdateOp::kSet:
3780 break;
3781 case GetAndUpdateOp::kAddWithByteSwap:
3782 if (arg.IsFpuRegisterPair()) {
3783 GenerateReverseBytes(assembler, DataType::Type::kFloat64, loaded_value, old_value);
3784 vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3785 __ Vadd(sum, DRegisterFrom(old_value), DRegisterFrom(arg));
3786 __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), sum); // Swap low/high.
3787 } else if (arg.IsFpuRegister()) {
3788 GenerateReverseBytes(assembler, DataType::Type::kFloat32, loaded_value, old_value);
3789 vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp); // The temporary is a pair.
3790 __ Vadd(sum, SRegisterFrom(old_value), SRegisterFrom(arg));
3791 __ Vmov(RegisterFrom(new_value), sum);
3792 } else if (load_store_type == DataType::Type::kInt64) {
3793 GenerateReverseBytes(assembler, DataType::Type::kInt64, loaded_value, old_value);
3794 // Swap low/high registers for the addition results.
3795 __ Adds(HighRegisterFrom(new_value), LowRegisterFrom(old_value), LowRegisterFrom(arg));
3796 __ Adc(LowRegisterFrom(new_value), HighRegisterFrom(old_value), HighRegisterFrom(arg));
3797 } else {
3798 GenerateReverseBytes(assembler, DataType::Type::kInt32, loaded_value, old_value);
3799 __ Add(RegisterFrom(new_value), RegisterFrom(old_value), RegisterFrom(arg));
3800 }
3801 if (load_store_type == DataType::Type::kInt64) {
3802 // The `new_value` already has the high and low word swapped. Reverse bytes in each.
3803 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
3804 } else {
3805 GenerateReverseBytes(assembler, load_store_type, new_value, new_value);
3806 }
3807 break;
3808 case GetAndUpdateOp::kAdd:
3809 if (arg.IsFpuRegisterPair()) {
3810 vixl32::DRegister old_value_vreg = DRegisterFrom(old_value);
3811 vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3812 __ Vmov(old_value_vreg, LowRegisterFrom(loaded_value), HighRegisterFrom(loaded_value));
3813 __ Vadd(sum, old_value_vreg, DRegisterFrom(arg));
3814 __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), sum);
3815 } else if (arg.IsFpuRegister()) {
3816 vixl32::SRegister old_value_vreg = SRegisterFrom(old_value);
3817 vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp); // The temporary is a pair.
3818 __ Vmov(old_value_vreg, RegisterFrom(loaded_value));
3819 __ Vadd(sum, old_value_vreg, SRegisterFrom(arg));
3820 __ Vmov(RegisterFrom(new_value), sum);
3821 } else if (load_store_type == DataType::Type::kInt64) {
3822 __ Adds(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3823 __ Adc(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3824 } else {
3825 __ Add(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3826 }
3827 break;
3828 case GetAndUpdateOp::kAnd:
3829 if (load_store_type == DataType::Type::kInt64) {
3830 __ And(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3831 __ And(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3832 } else {
3833 __ And(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3834 }
3835 break;
3836 case GetAndUpdateOp::kOr:
3837 if (load_store_type == DataType::Type::kInt64) {
3838 __ Orr(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3839 __ Orr(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3840 } else {
3841 __ Orr(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3842 }
3843 break;
3844 case GetAndUpdateOp::kXor:
3845 if (load_store_type == DataType::Type::kInt64) {
3846 __ Eor(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3847 __ Eor(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3848 } else {
3849 __ Eor(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3850 }
3851 break;
3852 }
3853 EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value);
3854 __ Cmp(store_result, 0);
3855 __ B(ne, &loop_label);
3856 }
3857
CreateUnsafeGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,GetAndUpdateOp get_and_update_op)3858 static void CreateUnsafeGetAndUpdateLocations(HInvoke* invoke,
3859 CodeGeneratorARMVIXL* codegen,
3860 DataType::Type type,
3861 GetAndUpdateOp get_and_update_op) {
3862 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
3863 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3864 LocationSummary* locations =
3865 new (allocator) LocationSummary(invoke,
3866 can_call
3867 ? LocationSummary::kCallOnSlowPath
3868 : LocationSummary::kNoCall,
3869 kIntrinsified);
3870 if (can_call && kUseBakerReadBarrier) {
3871 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
3872 }
3873 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
3874 locations->SetInAt(1, Location::RequiresRegister());
3875 locations->SetInAt(2, Location::RequiresRegister());
3876 locations->SetInAt(3, Location::RequiresRegister());
3877
3878 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3879
3880 size_t num_temps = 1u; // We always need `tmp_ptr`.
3881 if (get_and_update_op == GetAndUpdateOp::kAdd) {
3882 // Add `maybe_temp` used for the new value in `GenerateGetAndUpdate()`.
3883 num_temps += (type == DataType::Type::kInt64) ? 2u : 1u;
3884 if (type == DataType::Type::kInt64) {
3885 // There are enough available registers but the register allocator can fail to allocate
3886 // them correctly because it can block register pairs by single-register inputs and temps.
3887 // To work around this limitation, use a fixed register pair for both the output as well
3888 // as the offset which is not needed anymore after the address calculation.
3889 // (Alternatively, we could set up distinct fixed locations for `offset`, `arg` and `out`.)
3890 locations->SetInAt(2, LocationFrom(r0, r1));
3891 locations->UpdateOut(LocationFrom(r0, r1));
3892 }
3893 }
3894 locations->AddRegisterTemps(num_temps);
3895 }
3896
GenUnsafeGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,GetAndUpdateOp get_and_update_op)3897 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
3898 CodeGeneratorARMVIXL* codegen,
3899 DataType::Type type,
3900 GetAndUpdateOp get_and_update_op) {
3901 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3902 LocationSummary* locations = invoke->GetLocations();
3903
3904 Location out = locations->Out(); // Result.
3905 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
3906 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
3907 Location arg = locations->InAt(3); // New value or addend.
3908 vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
3909 Location maybe_temp = Location::NoLocation();
3910 if (get_and_update_op == GetAndUpdateOp::kAdd) {
3911 maybe_temp = (type == DataType::Type::kInt64)
3912 ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
3913 : locations->GetTemp(1);
3914 }
3915
3916 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3917 vixl32::Register temp = temps.Acquire();
3918
3919 if (type == DataType::Type::kReference) {
3920 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3921 // Mark card for object as a new value shall be stored.
3922 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
3923 vixl32::Register card = tmp_ptr; // Use the `tmp_ptr` also as the `card` temporary.
3924 codegen->MaybeMarkGCCard(temp, card, base, /*value=*/ RegisterFrom(arg), new_value_can_be_null);
3925 }
3926
3927 // Note: UnsafeGetAndUpdate operations are sequentially consistent, requiring
3928 // a barrier before and after the raw load/store-exclusive operation.
3929
3930 __ Add(tmp_ptr, base, Operand(offset));
3931 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3932 GenerateGetAndUpdate(codegen,
3933 get_and_update_op,
3934 type,
3935 tmp_ptr,
3936 arg,
3937 /*old_value=*/ out,
3938 /*store_result=*/ temp,
3939 maybe_temp,
3940 /*maybe_vreg_temp=*/ Location::NoLocation());
3941 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3942
3943 if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
3944 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3945 if (kUseBakerReadBarrier) {
3946 codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out), RegisterFrom(out));
3947 } else {
3948 codegen->GenerateReadBarrierSlow(
3949 invoke,
3950 out,
3951 out,
3952 Location::RegisterLocation(base.GetCode()),
3953 /*offset=*/ 0u,
3954 /*index=*/ Location::RegisterLocation(offset.GetCode()));
3955 }
3956 }
3957 }
3958
VisitUnsafeGetAndAddInt(HInvoke * invoke)3959 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3960 VisitJdkUnsafeGetAndAddInt(invoke);
3961 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)3962 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3963 VisitJdkUnsafeGetAndAddLong(invoke);
3964 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)3965 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3966 VisitJdkUnsafeGetAndSetInt(invoke);
3967 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)3968 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
3969 VisitJdkUnsafeGetAndSetLong(invoke);
3970 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)3971 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
3972 VisitJdkUnsafeGetAndSetReference(invoke);
3973 }
3974
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)3975 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
3976 CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
3977 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)3978 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
3979 CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
3980 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)3981 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
3982 CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
3983 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)3984 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
3985 CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
3986 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)3987 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
3988 CreateUnsafeGetAndUpdateLocations(
3989 invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
3990 }
3991
VisitUnsafeGetAndAddInt(HInvoke * invoke)3992 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3993 VisitJdkUnsafeGetAndAddInt(invoke);
3994 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)3995 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3996 VisitJdkUnsafeGetAndAddLong(invoke);
3997 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)3998 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3999 VisitJdkUnsafeGetAndSetInt(invoke);
4000 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)4001 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
4002 VisitJdkUnsafeGetAndSetLong(invoke);
4003 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)4004 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
4005 VisitJdkUnsafeGetAndSetReference(invoke);
4006 }
4007
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)4008 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
4009 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
4010 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)4011 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
4012 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
4013 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)4014 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
4015 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
4016 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)4017 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
4018 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
4019 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)4020 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
4021 GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
4022 }
4023
4024 class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL {
4025 public:
VarHandleSlowPathARMVIXL(HInvoke * invoke,std::memory_order order)4026 VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order)
4027 : IntrinsicSlowPathARMVIXL(invoke),
4028 order_(order),
4029 atomic_(false),
4030 return_success_(false),
4031 strong_(false),
4032 get_and_update_op_(GetAndUpdateOp::kAdd) {
4033 }
4034
GetByteArrayViewCheckLabel()4035 vixl32::Label* GetByteArrayViewCheckLabel() {
4036 return &byte_array_view_check_label_;
4037 }
4038
GetNativeByteOrderLabel()4039 vixl32::Label* GetNativeByteOrderLabel() {
4040 return &native_byte_order_label_;
4041 }
4042
SetAtomic(bool atomic)4043 void SetAtomic(bool atomic) {
4044 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGet ||
4045 GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kSet);
4046 atomic_ = atomic;
4047 }
4048
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)4049 void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
4050 if (return_success) {
4051 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
4052 } else {
4053 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
4054 }
4055 return_success_ = return_success;
4056 strong_ = strong;
4057 }
4058
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)4059 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
4060 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
4061 get_and_update_op_ = get_and_update_op;
4062 }
4063
EmitNativeCode(CodeGenerator * codegen_in)4064 void EmitNativeCode(CodeGenerator* codegen_in) override {
4065 if (GetByteArrayViewCheckLabel()->IsReferenced()) {
4066 EmitByteArrayViewCode(codegen_in);
4067 }
4068 IntrinsicSlowPathARMVIXL::EmitNativeCode(codegen_in);
4069 }
4070
4071 private:
GetInvoke() const4072 HInvoke* GetInvoke() const {
4073 return GetInstruction()->AsInvoke();
4074 }
4075
GetAccessModeTemplate() const4076 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
4077 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
4078 }
4079
4080 void EmitByteArrayViewCode(CodeGenerator* codegen_in);
4081
4082 vixl32::Label byte_array_view_check_label_;
4083 vixl32::Label native_byte_order_label_;
4084 // Shared parameter for all VarHandle intrinsics.
4085 std::memory_order order_;
4086 // Extra argument for GenerateVarHandleGet() and GenerateVarHandleSet().
4087 bool atomic_;
4088 // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
4089 bool return_success_;
4090 bool strong_;
4091 // Extra argument for GenerateVarHandleGetAndUpdate().
4092 GetAndUpdateOp get_and_update_op_;
4093 };
4094
4095 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,vixl32::Register object,vixl32::Register type,bool object_can_be_null=true)4096 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL* codegen,
4097 SlowPathCodeARMVIXL* slow_path,
4098 vixl32::Register object,
4099 vixl32::Register type,
4100 bool object_can_be_null = true) {
4101 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4102
4103 const MemberOffset class_offset = mirror::Object::ClassOffset();
4104 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
4105
4106 vixl32::Label success;
4107 if (object_can_be_null) {
4108 __ CompareAndBranchIfZero(object, &success, /*is_far_target=*/ false);
4109 }
4110
4111 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4112 vixl32::Register temp = temps.Acquire();
4113
4114 __ Ldr(temp, MemOperand(object, class_offset.Int32Value()));
4115 assembler->MaybeUnpoisonHeapReference(temp);
4116 vixl32::Label loop;
4117 __ Bind(&loop);
4118 __ Cmp(type, temp);
4119 __ B(eq, &success, /*is_far_target=*/ false);
4120 __ Ldr(temp, MemOperand(temp, super_class_offset.Int32Value()));
4121 assembler->MaybeUnpoisonHeapReference(temp);
4122 __ Cmp(temp, 0);
4123 __ B(eq, slow_path->GetEntryLabel());
4124 __ B(&loop);
4125 __ Bind(&success);
4126 }
4127
4128 // Check access mode and the primitive type from VarHandle.varType.
4129 // Check reference arguments against the VarHandle.varType; for references this is a subclass
4130 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,DataType::Type type)4131 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
4132 CodeGeneratorARMVIXL* codegen,
4133 SlowPathCodeARMVIXL* slow_path,
4134 DataType::Type type) {
4135 mirror::VarHandle::AccessMode access_mode =
4136 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
4137 Primitive::Type primitive_type = DataTypeToPrimitive(type);
4138
4139 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4140 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4141
4142 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
4143 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
4144 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4145
4146 // Use the temporary register reserved for offset. It is not used yet at this point.
4147 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4148 vixl32::Register var_type_no_rb =
4149 RegisterFrom(invoke->GetLocations()->GetTemp(expected_coordinates_count == 0u ? 1u : 0u));
4150
4151 // Check that the operation is permitted and the primitive type of varhandle.varType.
4152 // We do not need a read barrier when loading a reference only for loading constant
4153 // primitive field through the reference. Use LDRD to load the fields together.
4154 {
4155 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4156 vixl32::Register temp2 = temps.Acquire();
4157 DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
4158 __ Ldrd(var_type_no_rb, temp2, MemOperand(varhandle, var_type_offset.Int32Value()));
4159 assembler->MaybeUnpoisonHeapReference(var_type_no_rb);
4160 __ Tst(temp2, 1u << static_cast<uint32_t>(access_mode));
4161 __ B(eq, slow_path->GetEntryLabel());
4162 __ Ldrh(temp2, MemOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
4163 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4164 __ B(ne, slow_path->GetEntryLabel());
4165 }
4166
4167 if (type == DataType::Type::kReference) {
4168 // Check reference arguments against the varType.
4169 // False negatives due to varType being an interface or array type
4170 // or due to the missing read barrier are handled by the slow path.
4171 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4172 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4173 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4174 HInstruction* arg = invoke->InputAt(arg_index);
4175 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4176 if (!arg->IsNullConstant()) {
4177 vixl32::Register arg_reg = RegisterFrom(invoke->GetLocations()->InAt(arg_index));
4178 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4179 }
4180 }
4181 }
4182 }
4183
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4184 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4185 CodeGeneratorARMVIXL* codegen,
4186 SlowPathCodeARMVIXL* slow_path) {
4187 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4188 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4189
4190 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4191
4192 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4193 vixl32::Register temp = temps.Acquire();
4194
4195 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4196 // Do not emit read barrier (or unpoison the reference) for comparing to null.
4197 __ Ldr(temp, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4198 __ Cmp(temp, 0);
4199 __ B(ne, slow_path->GetEntryLabel());
4200 }
4201
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4202 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4203 CodeGeneratorARMVIXL* codegen,
4204 SlowPathCodeARMVIXL* slow_path) {
4205 VarHandleOptimizations optimizations(invoke);
4206 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4207 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4208 vixl32::Register object = InputRegisterAt(invoke, 1);
4209
4210 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4211 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4212
4213 // Null-check the object.
4214 if (!optimizations.GetSkipObjectNullCheck()) {
4215 __ Cmp(object, 0);
4216 __ B(eq, slow_path->GetEntryLabel());
4217 }
4218
4219 if (!optimizations.GetUseKnownImageVarHandle()) {
4220 // Use the first temporary register, whether it's for the declaring class or the offset.
4221 // It is not used yet at this point.
4222 vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4223
4224 // Check that the VarHandle references an instance field by checking that
4225 // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4226 // type compatibility check with the source object's type, which will fail for null.
4227 {
4228 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4229 vixl32::Register temp2 = temps.Acquire();
4230 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4231 __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4232 assembler->MaybeUnpoisonHeapReference(temp);
4233 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4234 __ Cmp(temp2, 0);
4235 __ B(ne, slow_path->GetEntryLabel());
4236 }
4237
4238 // Check that the object has the correct type.
4239 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4240 GenerateSubTypeObjectCheckNoReadBarrier(
4241 codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4242 }
4243 }
4244
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4245 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4246 CodeGeneratorARMVIXL* codegen,
4247 VarHandleSlowPathARMVIXL* slow_path) {
4248 VarHandleOptimizations optimizations(invoke);
4249 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4250 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4251 vixl32::Register object = InputRegisterAt(invoke, 1);
4252 vixl32::Register index = InputRegisterAt(invoke, 2);
4253 DataType::Type value_type =
4254 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4255 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4256
4257 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4258 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4259 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4260 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4261 const MemberOffset class_offset = mirror::Object::ClassOffset();
4262 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4263
4264 // Null-check the object.
4265 if (!optimizations.GetSkipObjectNullCheck()) {
4266 __ Cmp(object, 0);
4267 __ B(eq, slow_path->GetEntryLabel());
4268 }
4269
4270 // Use the offset temporary register. It is not used yet at this point.
4271 vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4272
4273 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4274 vixl32::Register temp2 = temps.Acquire();
4275
4276 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4277 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4278 // coordinateType0 shall not be null but we do not explicitly verify that.
4279 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4280 __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4281 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4282 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4283 __ Cmp(temp2, 0);
4284 __ B(eq, slow_path->GetEntryLabel());
4285
4286 // Check object class against componentType0.
4287 //
4288 // This is an exact check and we defer other cases to the runtime. This includes
4289 // conversion to array of superclass references, which is valid but subsequently
4290 // requires all update operations to check that the value can indeed be stored.
4291 // We do not want to perform such extra checks in the intrinsified code.
4292 //
4293 // We do this check without read barrier, so there can be false negatives which we
4294 // defer to the slow path. There shall be no false negatives for array classes in the
4295 // boot image (including Object[] and primitive arrays) because they are non-movable.
4296 __ Ldr(temp2, MemOperand(object, class_offset.Int32Value()));
4297 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4298 __ Cmp(temp, temp2);
4299 __ B(ne, slow_path->GetEntryLabel());
4300
4301 // Check that the coordinateType0 is an array type. We do not need a read barrier
4302 // for loading constant reference fields (or chains of them) for comparison with null,
4303 // nor for finally loading a constant primitive field (primitive type) below.
4304 __ Ldr(temp2, MemOperand(temp, component_type_offset.Int32Value()));
4305 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4306 __ Cmp(temp2, 0);
4307 __ B(eq, slow_path->GetEntryLabel());
4308
4309 // Check that the array component type matches the primitive type.
4310 // With the exception of `kPrimNot`, `kPrimByte` and `kPrimBoolean`,
4311 // we shall check for a byte array view in the slow path.
4312 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4313 // so we cannot emit that if we're JITting without boot image.
4314 bool boot_image_available =
4315 codegen->GetCompilerOptions().IsBootImage() ||
4316 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4317 bool can_be_view =
4318 ((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
4319 boot_image_available;
4320 vixl32::Label* slow_path_label =
4321 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4322 __ Ldrh(temp2, MemOperand(temp2, primitive_type_offset.Int32Value()));
4323 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4324 __ B(ne, slow_path_label);
4325
4326 // Check for array index out of bounds.
4327 __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
4328 __ Cmp(index, temp);
4329 __ B(hs, slow_path->GetEntryLabel());
4330 }
4331
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4332 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4333 CodeGeneratorARMVIXL* codegen,
4334 VarHandleSlowPathARMVIXL* slow_path) {
4335 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4336 if (expected_coordinates_count == 0u) {
4337 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4338 } else if (expected_coordinates_count == 1u) {
4339 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4340 } else {
4341 DCHECK_EQ(expected_coordinates_count, 2u);
4342 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4343 }
4344 }
4345
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,DataType::Type type)4346 static VarHandleSlowPathARMVIXL* GenerateVarHandleChecks(HInvoke* invoke,
4347 CodeGeneratorARMVIXL* codegen,
4348 std::memory_order order,
4349 DataType::Type type) {
4350 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4351 VarHandleOptimizations optimizations(invoke);
4352 if (optimizations.GetUseKnownImageVarHandle()) {
4353 DCHECK_NE(expected_coordinates_count, 2u);
4354 if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4355 return nullptr;
4356 }
4357 }
4358
4359 VarHandleSlowPathARMVIXL* slow_path =
4360 new (codegen->GetScopedAllocator()) VarHandleSlowPathARMVIXL(invoke, order);
4361 codegen->AddSlowPath(slow_path);
4362
4363 if (!optimizations.GetUseKnownImageVarHandle()) {
4364 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4365 }
4366 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4367
4368 return slow_path;
4369 }
4370
4371 struct VarHandleTarget {
4372 vixl32::Register object; // The object holding the value to operate on.
4373 vixl32::Register offset; // The offset of the value to operate on.
4374 };
4375
GetVarHandleTarget(HInvoke * invoke)4376 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4377 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4378 LocationSummary* locations = invoke->GetLocations();
4379
4380 VarHandleTarget target;
4381 // The temporary allocated for loading the offset.
4382 target.offset = RegisterFrom(locations->GetTemp(0u));
4383 // The reference to the object that holds the value to operate on.
4384 target.object = (expected_coordinates_count == 0u)
4385 ? RegisterFrom(locations->GetTemp(1u))
4386 : InputRegisterAt(invoke, 1);
4387 return target;
4388 }
4389
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARMVIXL * codegen)4390 static void GenerateVarHandleTarget(HInvoke* invoke,
4391 const VarHandleTarget& target,
4392 CodeGeneratorARMVIXL* codegen) {
4393 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4394 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4395 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4396
4397 if (expected_coordinates_count <= 1u) {
4398 if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4399 ScopedObjectAccess soa(Thread::Current());
4400 ArtField* target_field = GetBootImageVarHandleField(invoke);
4401 if (expected_coordinates_count == 0u) {
4402 ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4403 if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4404 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
4405 codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
4406 } else {
4407 codegen->LoadTypeForBootImageIntrinsic(
4408 target.object,
4409 TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
4410 }
4411 }
4412 __ Mov(target.offset, target_field->GetOffset().Uint32Value());
4413 } else {
4414 // For static fields, we need to fill the `target.object` with the declaring class,
4415 // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4416 // we do not need the declaring class, so we can forget the `ArtField*` when
4417 // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4418 vixl32::Register field = (expected_coordinates_count == 0) ? target.object : target.offset;
4419
4420 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4421 const MemberOffset offset_offset = ArtField::OffsetOffset();
4422
4423 // Load the ArtField*, the offset and, if needed, declaring class.
4424 __ Ldr(field, MemOperand(varhandle, art_field_offset.Int32Value()));
4425 __ Ldr(target.offset, MemOperand(field, offset_offset.Int32Value()));
4426 if (expected_coordinates_count == 0u) {
4427 codegen->GenerateGcRootFieldLoad(invoke,
4428 LocationFrom(target.object),
4429 field,
4430 ArtField::DeclaringClassOffset().Int32Value(),
4431 codegen->GetCompilerReadBarrierOption());
4432 }
4433 }
4434 } else {
4435 DCHECK_EQ(expected_coordinates_count, 2u);
4436 DataType::Type value_type =
4437 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4438 uint32_t size_shift = DataType::SizeShift(value_type);
4439 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4440
4441 vixl32::Register index = InputRegisterAt(invoke, 2);
4442 vixl32::Register shifted_index = index;
4443 if (size_shift != 0u) {
4444 shifted_index = target.offset;
4445 __ Lsl(shifted_index, index, size_shift);
4446 }
4447 __ Add(target.offset, shifted_index, data_offset.Int32Value());
4448 }
4449 }
4450
CreateVarHandleCommonLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)4451 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
4452 CodeGeneratorARMVIXL* codegen) {
4453 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4454 DataType::Type return_type = invoke->GetType();
4455
4456 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4457 LocationSummary* locations =
4458 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4459 locations->SetInAt(0, Location::RequiresRegister());
4460 // Require coordinates in registers. These are the object holding the value
4461 // to operate on (except for static fields) and index (for arrays and views).
4462 for (size_t i = 0; i != expected_coordinates_count; ++i) {
4463 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4464 }
4465 if (return_type != DataType::Type::kVoid) {
4466 if (DataType::IsFloatingPointType(return_type)) {
4467 locations->SetOut(Location::RequiresFpuRegister());
4468 } else {
4469 locations->SetOut(Location::RequiresRegister());
4470 }
4471 }
4472 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4473 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4474 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4475 HInstruction* arg = invoke->InputAt(arg_index);
4476 if (DataType::IsFloatingPointType(arg->GetType())) {
4477 locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4478 } else {
4479 locations->SetInAt(arg_index, Location::RequiresRegister());
4480 }
4481 }
4482
4483 // Add a temporary for offset.
4484 if (codegen->EmitNonBakerReadBarrier() &&
4485 GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4486 // To preserve the offset value across the non-Baker read barrier slow path
4487 // for loading the declaring class, use a fixed callee-save register.
4488 constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4489 locations->AddTemp(Location::RegisterLocation(first_callee_save));
4490 } else {
4491 locations->AddTemp(Location::RequiresRegister());
4492 }
4493 if (expected_coordinates_count == 0u) {
4494 // Add a temporary to hold the declaring class.
4495 locations->AddTemp(Location::RequiresRegister());
4496 }
4497
4498 return locations;
4499 }
4500
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4501 static void CreateVarHandleGetLocations(HInvoke* invoke,
4502 CodeGeneratorARMVIXL* codegen,
4503 bool atomic) {
4504 VarHandleOptimizations optimizations(invoke);
4505 if (optimizations.GetDoNotIntrinsify()) {
4506 return;
4507 }
4508
4509 if (codegen->EmitNonBakerReadBarrier() &&
4510 invoke->GetType() == DataType::Type::kReference &&
4511 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4512 invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4513 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4514 // the passed reference and reloads it from the field. This gets the memory visibility
4515 // wrong for Acquire/Volatile operations. b/173104084
4516 return;
4517 }
4518
4519 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4520
4521 DataType::Type type = invoke->GetType();
4522 if (type == DataType::Type::kFloat64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
4523 // We need 3 temporaries for GenerateIntrinsicGet() but we can reuse the
4524 // declaring class (if present) and offset temporary.
4525 DCHECK_EQ(locations->GetTempCount(),
4526 (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4527 locations->AddRegisterTemps(3u - locations->GetTempCount());
4528 }
4529 }
4530
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4531 static void GenerateVarHandleGet(HInvoke* invoke,
4532 CodeGeneratorARMVIXL* codegen,
4533 std::memory_order order,
4534 bool atomic,
4535 bool byte_swap = false) {
4536 DataType::Type type = invoke->GetType();
4537 DCHECK_NE(type, DataType::Type::kVoid);
4538
4539 LocationSummary* locations = invoke->GetLocations();
4540 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4541 Location out = locations->Out();
4542
4543 VarHandleTarget target = GetVarHandleTarget(invoke);
4544 VarHandleSlowPathARMVIXL* slow_path = nullptr;
4545 if (!byte_swap) {
4546 slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4547 GenerateVarHandleTarget(invoke, target, codegen);
4548 if (slow_path != nullptr) {
4549 slow_path->SetAtomic(atomic);
4550 __ Bind(slow_path->GetNativeByteOrderLabel());
4551 }
4552 }
4553
4554 Location maybe_temp = Location::NoLocation();
4555 Location maybe_temp2 = Location::NoLocation();
4556 Location maybe_temp3 = Location::NoLocation();
4557 if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
4558 // Reuse the offset temporary.
4559 maybe_temp = LocationFrom(target.offset);
4560 } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4561 // Reuse the offset temporary and declaring class (if present).
4562 // The address shall be constructed in the scratch register before they are clobbered.
4563 maybe_temp = LocationFrom(target.offset);
4564 DCHECK(maybe_temp.Equals(locations->GetTemp(0)));
4565 if (type == DataType::Type::kFloat64) {
4566 maybe_temp2 = locations->GetTemp(1);
4567 maybe_temp3 = locations->GetTemp(2);
4568 }
4569 }
4570
4571 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4572 Location loaded_value = out;
4573 DataType::Type load_type = type;
4574 if (byte_swap) {
4575 if (type == DataType::Type::kFloat64) {
4576 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4577 // Change load type to Int64 and promote `maybe_temp2` and `maybe_temp3` to `loaded_value`.
4578 loaded_value = LocationFrom(RegisterFrom(maybe_temp2), RegisterFrom(maybe_temp3));
4579 maybe_temp2 = Location::NoLocation();
4580 maybe_temp3 = Location::NoLocation();
4581 } else {
4582 // Use the offset temporary and the scratch register.
4583 loaded_value = LocationFrom(target.offset, temps.Acquire());
4584 }
4585 load_type = DataType::Type::kInt64;
4586 } else if (type == DataType::Type::kFloat32) {
4587 // Reuse the offset temporary.
4588 loaded_value = LocationFrom(target.offset);
4589 load_type = DataType::Type::kInt32;
4590 } else if (type == DataType::Type::kInt64) {
4591 // Swap the high and low registers and reverse the bytes in each after the load.
4592 loaded_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
4593 }
4594 }
4595
4596 GenerateIntrinsicGet(invoke,
4597 codegen,
4598 load_type,
4599 order,
4600 atomic,
4601 target.object,
4602 target.offset,
4603 loaded_value,
4604 maybe_temp,
4605 maybe_temp2,
4606 maybe_temp3);
4607 if (byte_swap) {
4608 if (type == DataType::Type::kInt64) {
4609 GenerateReverseBytesInPlaceForEachWord(assembler, loaded_value);
4610 } else {
4611 GenerateReverseBytes(assembler, type, loaded_value, out);
4612 }
4613 }
4614
4615 if (slow_path != nullptr) {
4616 DCHECK(!byte_swap);
4617 __ Bind(slow_path->GetExitLabel());
4618 }
4619 }
4620
VisitVarHandleGet(HInvoke * invoke)4621 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4622 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ false);
4623 }
4624
VisitVarHandleGet(HInvoke * invoke)4625 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4626 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4627 }
4628
VisitVarHandleGetOpaque(HInvoke * invoke)4629 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4630 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4631 }
4632
VisitVarHandleGetOpaque(HInvoke * invoke)4633 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4634 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4635 }
4636
VisitVarHandleGetAcquire(HInvoke * invoke)4637 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4638 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4639 }
4640
VisitVarHandleGetAcquire(HInvoke * invoke)4641 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4642 GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire, /*atomic=*/ true);
4643 }
4644
VisitVarHandleGetVolatile(HInvoke * invoke)4645 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4646 CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4647 }
4648
VisitVarHandleGetVolatile(HInvoke * invoke)4649 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4650 GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4651 }
4652
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4653 static void CreateVarHandleSetLocations(HInvoke* invoke,
4654 CodeGeneratorARMVIXL* codegen,
4655 bool atomic) {
4656 VarHandleOptimizations optimizations(invoke);
4657 if (optimizations.GetDoNotIntrinsify()) {
4658 return;
4659 }
4660
4661 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4662
4663 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4664 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4665 if (DataType::Is64BitType(value_type)) {
4666 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4667 DCHECK_EQ(locations->GetTempCount(), (expected_coordinates_count == 0) ? 2u : 1u);
4668 HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
4669 bool has_reverse_bytes_slow_path =
4670 (expected_coordinates_count == 2u) &&
4671 !IsZeroBitPattern(arg);
4672 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4673 // We need 4 temporaries in the byte array view slow path. Otherwise, we need
4674 // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
4675 // We can reuse the offset temporary and declaring class (if present).
4676 size_t temps_needed = has_reverse_bytes_slow_path
4677 ? 4u
4678 : ((value_type == DataType::Type::kFloat64) ? 3u : 2u);
4679 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4680 } else if (has_reverse_bytes_slow_path) {
4681 // We need 2 temps for the value with reversed bytes in the byte array view slow path.
4682 // We can reuse the offset temporary.
4683 DCHECK_EQ(locations->GetTempCount(), 1u);
4684 locations->AddTemp(Location::RequiresRegister());
4685 }
4686 }
4687 }
4688
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4689 static void GenerateVarHandleSet(HInvoke* invoke,
4690 CodeGeneratorARMVIXL* codegen,
4691 std::memory_order order,
4692 bool atomic,
4693 bool byte_swap = false) {
4694 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4695 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4696
4697 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4698 LocationSummary* locations = invoke->GetLocations();
4699 Location value = locations->InAt(value_index);
4700
4701 VarHandleTarget target = GetVarHandleTarget(invoke);
4702 VarHandleSlowPathARMVIXL* slow_path = nullptr;
4703 if (!byte_swap) {
4704 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4705 GenerateVarHandleTarget(invoke, target, codegen);
4706 if (slow_path != nullptr) {
4707 slow_path->SetAtomic(atomic);
4708 __ Bind(slow_path->GetNativeByteOrderLabel());
4709 }
4710 }
4711
4712 Location maybe_temp = Location::NoLocation();
4713 Location maybe_temp2 = Location::NoLocation();
4714 Location maybe_temp3 = Location::NoLocation();
4715 if (DataType::Is64BitType(value_type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4716 // Reuse the offset temporary and declaring class (if present).
4717 // The address shall be constructed in the scratch register before they are clobbered.
4718 maybe_temp = locations->GetTemp(0);
4719 maybe_temp2 = locations->GetTemp(1);
4720 if (value_type == DataType::Type::kFloat64) {
4721 maybe_temp3 = locations->GetTemp(2);
4722 }
4723 }
4724
4725 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4726 if (byte_swap) {
4727 if (DataType::Is64BitType(value_type) || value_type == DataType::Type::kFloat32) {
4728 // Calculate the address in scratch register, so that we can use the offset temporary.
4729 vixl32::Register base = temps.Acquire();
4730 __ Add(base, target.object, target.offset);
4731 target.object = base;
4732 target.offset = vixl32::Register();
4733 }
4734 Location original_value = value;
4735 if (DataType::Is64BitType(value_type)) {
4736 size_t temp_start = 0u;
4737 if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4738 // Clear `maybe_temp3` which was initialized above for Float64.
4739 DCHECK_IMPLIES(value_type == DataType::Type::kFloat64,
4740 maybe_temp3.Equals(locations->GetTemp(2)));
4741 maybe_temp3 = Location::NoLocation();
4742 temp_start = 2u;
4743 }
4744 value = LocationFrom(RegisterFrom(locations->GetTemp(temp_start)),
4745 RegisterFrom(locations->GetTemp(temp_start + 1u)));
4746 if (value_type == DataType::Type::kFloat64) {
4747 __ Vmov(HighRegisterFrom(value), LowRegisterFrom(value), DRegisterFrom(original_value));
4748 GenerateReverseBytesInPlaceForEachWord(assembler, value);
4749 value_type = DataType::Type::kInt64;
4750 } else {
4751 GenerateReverseBytes(assembler, value_type, original_value, value);
4752 }
4753 } else if (value_type == DataType::Type::kFloat32) {
4754 value = locations->GetTemp(0); // Use the offset temporary which was freed above.
4755 __ Vmov(RegisterFrom(value), SRegisterFrom(original_value));
4756 GenerateReverseBytes(assembler, DataType::Type::kInt32, value, value);
4757 value_type = DataType::Type::kInt32;
4758 } else {
4759 value = LocationFrom(temps.Acquire());
4760 GenerateReverseBytes(assembler, value_type, original_value, value);
4761 }
4762 }
4763
4764 GenerateIntrinsicSet(codegen,
4765 value_type,
4766 order,
4767 atomic,
4768 target.object,
4769 target.offset,
4770 value,
4771 maybe_temp,
4772 maybe_temp2,
4773 maybe_temp3);
4774
4775 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4776 // Reuse the offset temporary for MarkGCCard.
4777 vixl32::Register temp = target.offset;
4778 vixl32::Register card = temps.Acquire();
4779 vixl32::Register value_reg = RegisterFrom(value);
4780 codegen->MaybeMarkGCCard(temp, card, target.object, value_reg, /* emit_null_check= */ true);
4781 }
4782
4783 if (slow_path != nullptr) {
4784 DCHECK(!byte_swap);
4785 __ Bind(slow_path->GetExitLabel());
4786 }
4787 }
4788
VisitVarHandleSet(HInvoke * invoke)4789 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4790 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ false);
4791 }
4792
VisitVarHandleSet(HInvoke * invoke)4793 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4794 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4795 }
4796
VisitVarHandleSetOpaque(HInvoke * invoke)4797 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4798 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4799 }
4800
VisitVarHandleSetOpaque(HInvoke * invoke)4801 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4802 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4803 }
4804
VisitVarHandleSetRelease(HInvoke * invoke)4805 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4806 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4807 }
4808
VisitVarHandleSetRelease(HInvoke * invoke)4809 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4810 GenerateVarHandleSet(invoke, codegen_, std::memory_order_release, /*atomic=*/ true);
4811 }
4812
VisitVarHandleSetVolatile(HInvoke * invoke)4813 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4814 CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4815 }
4816
VisitVarHandleSetVolatile(HInvoke * invoke)4817 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4818 // ARM store-release instructions are implicitly sequentially consistent.
4819 GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4820 }
4821
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool return_success)4822 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4823 CodeGeneratorARMVIXL* codegen,
4824 bool return_success) {
4825 VarHandleOptimizations optimizations(invoke);
4826 if (optimizations.GetDoNotIntrinsify()) {
4827 return;
4828 }
4829
4830 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4831 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4832 if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
4833 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4834 // the passed reference and reloads it from the field. This breaks the read barriers
4835 // in slow path in different ways. The marked old value may not actually be a to-space
4836 // reference to the same object as `old_value`, breaking slow path assumptions. And
4837 // for CompareAndExchange, marking the old value after comparison failure may actually
4838 // return the reference to `expected`, erroneously indicating success even though we
4839 // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4840 return;
4841 }
4842
4843 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4844
4845 if (codegen->EmitNonBakerReadBarrier()) {
4846 // We need callee-save registers for both the class object and offset instead of
4847 // the temporaries reserved in CreateVarHandleCommonLocations().
4848 static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
4849 constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4850 constexpr int second_callee_save = CTZ(kArmCalleeSaveRefSpills ^ (1u << first_callee_save));
4851 if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4852 DCHECK_EQ(locations->GetTempCount(), 2u);
4853 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4854 DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4855 locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4856 } else {
4857 DCHECK_EQ(locations->GetTempCount(), 1u);
4858 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4859 locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4860 }
4861 }
4862
4863 if (DataType::IsFloatingPointType(value_type)) {
4864 // We can reuse the declaring class (if present) and offset temporary.
4865 DCHECK_EQ(locations->GetTempCount(),
4866 (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4867 size_t temps_needed = (value_type == DataType::Type::kFloat64)
4868 ? (return_success ? 5u : 7u)
4869 : (return_success ? 3u : 4u);
4870 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4871 } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4872 // Add temps for the byte-reversed `expected` and `new_value` in the byte array view slow path.
4873 DCHECK_EQ(locations->GetTempCount(), 1u);
4874 if (value_type == DataType::Type::kInt64) {
4875 // We would ideally add 4 temps for Int64 but that would simply run out of registers,
4876 // so we instead need to reverse bytes in actual arguments and undo it at the end.
4877 } else {
4878 locations->AddRegisterTemps(2u);
4879 }
4880 }
4881 if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
4882 // Add a temporary for store result, also used for the `old_value_temp` in slow path.
4883 locations->AddTemp(Location::RequiresRegister());
4884 }
4885 }
4886
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4887 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4888 CodeGeneratorARMVIXL* codegen,
4889 std::memory_order order,
4890 bool return_success,
4891 bool strong,
4892 bool byte_swap = false) {
4893 DCHECK(return_success || strong);
4894
4895 uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4896 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4897 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4898 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4899
4900 ArmVIXLAssembler* assembler = codegen->GetAssembler();
4901 LocationSummary* locations = invoke->GetLocations();
4902 Location expected = locations->InAt(expected_index);
4903 Location new_value = locations->InAt(new_value_index);
4904 Location out = locations->Out();
4905
4906 VarHandleTarget target = GetVarHandleTarget(invoke);
4907 VarHandleSlowPathARMVIXL* slow_path = nullptr;
4908 if (!byte_swap) {
4909 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4910 GenerateVarHandleTarget(invoke, target, codegen);
4911 if (slow_path != nullptr) {
4912 slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4913 __ Bind(slow_path->GetNativeByteOrderLabel());
4914 }
4915 }
4916
4917 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
4918 bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
4919 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
4920 DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
4921
4922 if (release_barrier) {
4923 codegen->GenerateMemoryBarrier(
4924 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
4925 }
4926
4927 // Calculate the pointer to the value.
4928 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4929 vixl32::Register tmp_ptr = temps.Acquire();
4930 __ Add(tmp_ptr, target.object, target.offset);
4931
4932 // Move floating point values to temporaries and prepare output registers.
4933 // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4934 // Reuse the declaring class (if present) and offset temporary for non-reference types,
4935 // the address has already been constructed in the scratch register. We are more careful
4936 // for references due to read and write barrier, see below.
4937 Location old_value;
4938 vixl32::Register store_result;
4939 vixl32::Register success = return_success ? RegisterFrom(out) : vixl32::Register();
4940 DataType::Type cas_type = value_type;
4941 if (value_type == DataType::Type::kFloat64) {
4942 vixl32::DRegister expected_vreg = DRegisterFrom(expected);
4943 vixl32::DRegister new_value_vreg = DRegisterFrom(new_value);
4944 expected =
4945 LocationFrom(RegisterFrom(locations->GetTemp(0)), RegisterFrom(locations->GetTemp(1)));
4946 new_value =
4947 LocationFrom(RegisterFrom(locations->GetTemp(2)), RegisterFrom(locations->GetTemp(3)));
4948 store_result = RegisterFrom(locations->GetTemp(4));
4949 old_value = return_success
4950 ? LocationFrom(success, store_result)
4951 : LocationFrom(RegisterFrom(locations->GetTemp(5)), RegisterFrom(locations->GetTemp(6)));
4952 if (byte_swap) {
4953 __ Vmov(HighRegisterFrom(expected), LowRegisterFrom(expected), expected_vreg);
4954 __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), new_value_vreg);
4955 GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4956 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4957 } else {
4958 __ Vmov(LowRegisterFrom(expected), HighRegisterFrom(expected), expected_vreg);
4959 __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), new_value_vreg);
4960 }
4961 cas_type = DataType::Type::kInt64;
4962 } else if (value_type == DataType::Type::kFloat32) {
4963 vixl32::SRegister expected_vreg = SRegisterFrom(expected);
4964 vixl32::SRegister new_value_vreg = SRegisterFrom(new_value);
4965 expected = locations->GetTemp(0);
4966 new_value = locations->GetTemp(1);
4967 store_result = RegisterFrom(locations->GetTemp(2));
4968 old_value = return_success ? LocationFrom(store_result) : locations->GetTemp(3);
4969 __ Vmov(RegisterFrom(expected), expected_vreg);
4970 __ Vmov(RegisterFrom(new_value), new_value_vreg);
4971 if (byte_swap) {
4972 GenerateReverseBytes(assembler, DataType::Type::kInt32, expected, expected);
4973 GenerateReverseBytes(assembler, DataType::Type::kInt32, new_value, new_value);
4974 }
4975 cas_type = DataType::Type::kInt32;
4976 } else if (value_type == DataType::Type::kInt64) {
4977 store_result = RegisterFrom(locations->GetTemp(0));
4978 old_value = return_success
4979 ? LocationFrom(success, store_result)
4980 // If swapping bytes, swap the high/low regs and reverse the bytes in each after the load.
4981 : byte_swap ? LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)) : out;
4982 if (byte_swap) {
4983 // Due to lack of registers, reverse bytes in `expected` and `new_value` and undo that later.
4984 GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4985 expected = LocationFrom(HighRegisterFrom(expected), LowRegisterFrom(expected));
4986 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4987 new_value = LocationFrom(HighRegisterFrom(new_value), LowRegisterFrom(new_value));
4988 }
4989 } else {
4990 // Use the last temp. For references with read barriers, this is an extra temporary
4991 // allocated to avoid overwriting the temporaries for declaring class (if present)
4992 // and offset as they are needed in the slow path. Otherwise, this is the offset
4993 // temporary which also works for references without read barriers that need the
4994 // object register preserved for the write barrier.
4995 store_result = RegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u));
4996 old_value = return_success ? LocationFrom(store_result) : out;
4997 if (byte_swap) {
4998 DCHECK_EQ(locations->GetTempCount(), 3u);
4999 Location original_expected = expected;
5000 Location original_new_value = new_value;
5001 expected = locations->GetTemp(0);
5002 new_value = locations->GetTemp(1);
5003 GenerateReverseBytes(assembler, value_type, original_expected, expected);
5004 GenerateReverseBytes(assembler, value_type, original_new_value, new_value);
5005 }
5006 }
5007
5008 vixl32::Label exit_loop_label;
5009 vixl32::Label* exit_loop = &exit_loop_label;
5010 vixl32::Label* cmp_failure = &exit_loop_label;
5011
5012 if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5013 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
5014 // reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`.
5015 vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result;
5016 // The slow path store result must not clobber `old_value`.
5017 vixl32::Register slow_path_store_result = old_value_temp;
5018 ReadBarrierCasSlowPathARMVIXL* rb_slow_path =
5019 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
5020 invoke,
5021 strong,
5022 target.object,
5023 target.offset,
5024 RegisterFrom(expected),
5025 RegisterFrom(new_value),
5026 RegisterFrom(old_value),
5027 old_value_temp,
5028 slow_path_store_result,
5029 success,
5030 codegen);
5031 codegen->AddSlowPath(rb_slow_path);
5032 exit_loop = rb_slow_path->GetExitLabel();
5033 cmp_failure = rb_slow_path->GetEntryLabel();
5034 }
5035
5036 GenerateCompareAndSet(codegen,
5037 cas_type,
5038 strong,
5039 cmp_failure,
5040 /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
5041 tmp_ptr,
5042 expected,
5043 new_value,
5044 old_value,
5045 store_result,
5046 success);
5047 __ Bind(exit_loop);
5048
5049 if (acquire_barrier) {
5050 codegen->GenerateMemoryBarrier(
5051 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5052 }
5053
5054 if (byte_swap && value_type == DataType::Type::kInt64) {
5055 // Undo byte swapping in `expected` and `new_value`. We do not have the
5056 // information whether the value in these registers shall be needed later.
5057 GenerateReverseBytesInPlaceForEachWord(assembler, expected);
5058 GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
5059 }
5060 if (!return_success) {
5061 if (byte_swap) {
5062 if (value_type == DataType::Type::kInt64) {
5063 GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5064 } else {
5065 GenerateReverseBytes(assembler, value_type, old_value, out);
5066 }
5067 } else if (value_type == DataType::Type::kFloat64) {
5068 __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5069 } else if (value_type == DataType::Type::kFloat32) {
5070 __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
5071 }
5072 }
5073
5074 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
5075 // Reuse the offset temporary and scratch register for MarkGCCard.
5076 vixl32::Register temp = target.offset;
5077 vixl32::Register card = tmp_ptr;
5078 // Mark card for object assuming new value is stored.
5079 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
5080 codegen->MaybeMarkGCCard(
5081 temp, card, target.object, RegisterFrom(new_value), new_value_can_be_null);
5082 }
5083
5084 if (slow_path != nullptr) {
5085 DCHECK(!byte_swap);
5086 __ Bind(slow_path->GetExitLabel());
5087 }
5088 }
5089
VisitVarHandleCompareAndExchange(HInvoke * invoke)5090 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5091 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5092 }
5093
VisitVarHandleCompareAndExchange(HInvoke * invoke)5094 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5095 GenerateVarHandleCompareAndSetOrExchange(
5096 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
5097 }
5098
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5099 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5100 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5101 }
5102
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5103 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5104 GenerateVarHandleCompareAndSetOrExchange(
5105 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
5106 }
5107
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5108 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5109 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5110 }
5111
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5112 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5113 GenerateVarHandleCompareAndSetOrExchange(
5114 invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
5115 }
5116
VisitVarHandleCompareAndSet(HInvoke * invoke)5117 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5118 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5119 }
5120
VisitVarHandleCompareAndSet(HInvoke * invoke)5121 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5122 GenerateVarHandleCompareAndSetOrExchange(
5123 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
5124 }
5125
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5126 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5127 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5128 }
5129
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5130 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5131 GenerateVarHandleCompareAndSetOrExchange(
5132 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
5133 }
5134
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5135 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5136 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5137 }
5138
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5139 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5140 GenerateVarHandleCompareAndSetOrExchange(
5141 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
5142 }
5143
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5144 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5145 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5146 }
5147
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5148 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5149 GenerateVarHandleCompareAndSetOrExchange(
5150 invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
5151 }
5152
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5153 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5154 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5155 }
5156
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5157 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5158 GenerateVarHandleCompareAndSetOrExchange(
5159 invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
5160 }
5161
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op)5162 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
5163 CodeGeneratorARMVIXL* codegen,
5164 GetAndUpdateOp get_and_update_op) {
5165 VarHandleOptimizations optimizations(invoke);
5166 if (optimizations.GetDoNotIntrinsify()) {
5167 return;
5168 }
5169
5170 if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5171 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5172 // the passed reference and reloads it from the field, thus seeing the new value
5173 // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
5174 return;
5175 }
5176
5177 LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5178
5179 // We can reuse the declaring class (if present) and offset temporary, except for
5180 // non-Baker read barriers that need them for the slow path.
5181 DCHECK_EQ(locations->GetTempCount(),
5182 (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5183
5184 DataType::Type value_type = invoke->GetType();
5185 if (get_and_update_op == GetAndUpdateOp::kSet) {
5186 if (DataType::IsFloatingPointType(value_type)) {
5187 // Add temps needed to do the GenerateGetAndUpdate() with core registers.
5188 size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
5189 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5190 } else if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5191 // We need to preserve the declaring class (if present) and offset for read barrier
5192 // slow paths, so we must use a separate temporary for the exclusive store result.
5193 locations->AddTemp(Location::RequiresRegister());
5194 } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5195 // Add temps for the byte-reversed `arg` in the byte array view slow path.
5196 DCHECK_EQ(locations->GetTempCount(), 1u);
5197 locations->AddRegisterTemps((value_type == DataType::Type::kInt64) ? 2u : 1u);
5198 }
5199 } else {
5200 // We need temporaries for the new value and exclusive store result.
5201 size_t temps_needed = DataType::Is64BitType(value_type) ? 3u : 2u;
5202 if (get_and_update_op != GetAndUpdateOp::kAdd &&
5203 GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5204 // Add temps for the byte-reversed `arg` in the byte array view slow path.
5205 if (value_type == DataType::Type::kInt64) {
5206 // We would ideally add 2 temps for Int64 but that would simply run out of registers,
5207 // so we instead need to reverse bytes in the actual argument and undo it at the end.
5208 } else {
5209 temps_needed += 1u;
5210 }
5211 }
5212 locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5213 if (DataType::IsFloatingPointType(value_type)) {
5214 // Note: This shall allocate a D register. There is no way to request an S register.
5215 locations->AddTemp(Location::RequiresFpuRegister());
5216 }
5217 }
5218 }
5219
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)5220 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5221 CodeGeneratorARMVIXL* codegen,
5222 GetAndUpdateOp get_and_update_op,
5223 std::memory_order order,
5224 bool byte_swap = false) {
5225 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5226 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5227
5228 ArmVIXLAssembler* assembler = codegen->GetAssembler();
5229 LocationSummary* locations = invoke->GetLocations();
5230 Location arg = locations->InAt(arg_index);
5231 Location out = locations->Out();
5232
5233 VarHandleTarget target = GetVarHandleTarget(invoke);
5234 VarHandleSlowPathARMVIXL* slow_path = nullptr;
5235 if (!byte_swap) {
5236 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5237 GenerateVarHandleTarget(invoke, target, codegen);
5238 if (slow_path != nullptr) {
5239 slow_path->SetGetAndUpdateOp(get_and_update_op);
5240 __ Bind(slow_path->GetNativeByteOrderLabel());
5241 }
5242 }
5243
5244 bool seq_cst_barrier = (order == std::memory_order_seq_cst);
5245 bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
5246 bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
5247 DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
5248
5249 if (release_barrier) {
5250 codegen->GenerateMemoryBarrier(
5251 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
5252 }
5253
5254 // Use the scratch register for the pointer to the target location.
5255 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5256 vixl32::Register tmp_ptr = temps.Acquire();
5257 __ Add(tmp_ptr, target.object, target.offset);
5258
5259 // Use the offset temporary for the exclusive store result.
5260 vixl32::Register store_result = target.offset;
5261
5262 // The load/store type is never floating point.
5263 DataType::Type load_store_type = DataType::IsFloatingPointType(value_type)
5264 ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5265 : value_type;
5266
5267 // Prepare register for old value and temporaries if any.
5268 Location old_value = out;
5269 Location maybe_temp = Location::NoLocation();
5270 Location maybe_vreg_temp = Location::NoLocation();
5271 if (get_and_update_op == GetAndUpdateOp::kSet) {
5272 // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5273 // rather than moving between core and FP registers in the loop.
5274 if (value_type == DataType::Type::kFloat64) {
5275 vixl32::DRegister arg_vreg = DRegisterFrom(arg);
5276 DCHECK_EQ(locations->GetTempCount(), 5u); // `store_result` and the four here.
5277 old_value =
5278 LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)));
5279 arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4)));
5280 if (byte_swap) {
5281 __ Vmov(HighRegisterFrom(arg), LowRegisterFrom(arg), arg_vreg);
5282 GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5283 } else {
5284 __ Vmov(LowRegisterFrom(arg), HighRegisterFrom(arg), arg_vreg);
5285 }
5286 } else if (value_type == DataType::Type::kFloat32) {
5287 vixl32::SRegister arg_vreg = SRegisterFrom(arg);
5288 DCHECK_EQ(locations->GetTempCount(), 3u); // `store_result` and the two here.
5289 old_value = locations->GetTemp(1);
5290 arg = locations->GetTemp(2);
5291 __ Vmov(RegisterFrom(arg), arg_vreg);
5292 if (byte_swap) {
5293 GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
5294 }
5295 } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5296 if (kUseBakerReadBarrier) {
5297 // Load the old value initially to a temporary register.
5298 // We shall move it to `out` later with a read barrier.
5299 old_value = LocationFrom(store_result);
5300 store_result = RegisterFrom(out); // Use the `out` for the exclusive store result.
5301 } else {
5302 // The store_result is a separate temporary.
5303 DCHECK(!store_result.Is(target.object));
5304 DCHECK(!store_result.Is(target.offset));
5305 }
5306 } else if (byte_swap) {
5307 Location original_arg = arg;
5308 arg = locations->GetTemp(1);
5309 if (value_type == DataType::Type::kInt64) {
5310 arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2)));
5311 // Swap the high/low regs and reverse the bytes in each after the load.
5312 old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5313 }
5314 GenerateReverseBytes(assembler, value_type, original_arg, arg);
5315 }
5316 } else {
5317 maybe_temp = DataType::Is64BitType(value_type)
5318 ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
5319 : locations->GetTemp(1);
5320 DCHECK(!maybe_temp.Contains(LocationFrom(store_result)));
5321 if (DataType::IsFloatingPointType(value_type)) {
5322 maybe_vreg_temp = locations->GetTemp(locations->GetTempCount() - 1u);
5323 DCHECK(maybe_vreg_temp.IsFpuRegisterPair());
5324 }
5325 if (byte_swap) {
5326 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5327 // We need to do the byte swapping in the CAS loop for GetAndAdd.
5328 get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5329 } else if (value_type == DataType::Type::kInt64) {
5330 // Swap the high/low regs and reverse the bytes in each after the load.
5331 old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5332 // Due to lack of registers, reverse bytes in `arg` and undo that later.
5333 GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5334 arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg));
5335 } else {
5336 DCHECK(!DataType::IsFloatingPointType(value_type));
5337 Location original_arg = arg;
5338 arg = locations->GetTemp(2);
5339 DCHECK(!arg.Contains(LocationFrom(store_result)));
5340 GenerateReverseBytes(assembler, value_type, original_arg, arg);
5341 }
5342 }
5343 }
5344
5345 GenerateGetAndUpdate(codegen,
5346 get_and_update_op,
5347 load_store_type,
5348 tmp_ptr,
5349 arg,
5350 old_value,
5351 store_result,
5352 maybe_temp,
5353 maybe_vreg_temp);
5354
5355 if (acquire_barrier) {
5356 codegen->GenerateMemoryBarrier(
5357 seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5358 }
5359
5360 if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
5361 if (value_type == DataType::Type::kInt64) {
5362 GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5363 if (get_and_update_op != GetAndUpdateOp::kSet) {
5364 // Undo byte swapping in `arg`. We do not have the information
5365 // whether the value in these registers shall be needed later.
5366 GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5367 }
5368 } else {
5369 GenerateReverseBytes(assembler, value_type, old_value, out);
5370 }
5371 } else if (get_and_update_op == GetAndUpdateOp::kSet &&
5372 DataType::IsFloatingPointType(value_type)) {
5373 if (value_type == DataType::Type::kFloat64) {
5374 __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5375 } else {
5376 __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
5377 }
5378 } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5379 if (kUseBakerReadBarrier) {
5380 codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out),
5381 RegisterFrom(old_value));
5382 } else {
5383 codegen->GenerateReadBarrierSlow(
5384 invoke,
5385 Location::RegisterLocation(RegisterFrom(out).GetCode()),
5386 Location::RegisterLocation(RegisterFrom(old_value).GetCode()),
5387 Location::RegisterLocation(target.object.GetCode()),
5388 /*offset=*/ 0u,
5389 /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5390 }
5391 }
5392
5393 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5394 // Reuse the offset temporary and scratch register for MarkGCCard.
5395 vixl32::Register temp = target.offset;
5396 vixl32::Register card = tmp_ptr;
5397 // Mark card for object assuming new value is stored.
5398 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
5399 codegen->MaybeMarkGCCard(temp, card, target.object, RegisterFrom(arg), new_value_can_be_null);
5400 }
5401
5402 if (slow_path != nullptr) {
5403 DCHECK(!byte_swap);
5404 __ Bind(slow_path->GetExitLabel());
5405 }
5406 }
5407
VisitVarHandleGetAndSet(HInvoke * invoke)5408 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5409 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5410 }
5411
VisitVarHandleGetAndSet(HInvoke * invoke)5412 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5413 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5414 }
5415
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5416 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5417 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5418 }
5419
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5420 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5421 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5422 }
5423
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5424 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5425 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5426 }
5427
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5428 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5429 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5430 }
5431
VisitVarHandleGetAndAdd(HInvoke * invoke)5432 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5433 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5434 }
5435
VisitVarHandleGetAndAdd(HInvoke * invoke)5436 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5437 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5438 }
5439
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5440 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5441 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5442 }
5443
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5444 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5445 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5446 }
5447
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5448 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5449 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5450 }
5451
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5452 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5453 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5454 }
5455
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5456 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5457 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5458 }
5459
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5460 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5461 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5462 }
5463
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5464 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5465 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5466 }
5467
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5468 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5469 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5470 }
5471
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5472 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5473 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5474 }
5475
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5476 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5477 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5478 }
5479
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5480 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5481 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5482 }
5483
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5484 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5485 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5486 }
5487
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5488 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5489 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5490 }
5491
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5492 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5493 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5494 }
5495
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5496 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5497 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5498 }
5499
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5500 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5501 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5502 }
5503
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5504 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5505 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5506 }
5507
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5508 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5509 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5510 }
5511
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5512 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5513 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5514 }
5515
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5516 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5517 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5518 }
5519
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5520 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5521 CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5522 }
5523
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5524 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5525 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5526 }
5527
EmitByteArrayViewCode(CodeGenerator * codegen_in)5528 void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5529 DCHECK(GetByteArrayViewCheckLabel()->IsReferenced());
5530 CodeGeneratorARMVIXL* codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_in);
5531 ArmVIXLAssembler* assembler = codegen->GetAssembler();
5532 HInvoke* invoke = GetInvoke();
5533 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5534 DataType::Type value_type =
5535 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5536 DCHECK_NE(value_type, DataType::Type::kReference);
5537 size_t size = DataType::Size(value_type);
5538 DCHECK_GT(size, 1u);
5539 vixl32::Operand size_operand(dchecked_integral_cast<int32_t>(size));
5540 vixl32::Register varhandle = InputRegisterAt(invoke, 0);
5541 vixl32::Register object = InputRegisterAt(invoke, 1);
5542 vixl32::Register index = InputRegisterAt(invoke, 2);
5543
5544 MemberOffset class_offset = mirror::Object::ClassOffset();
5545 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5546 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5547 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5548
5549 __ Bind(GetByteArrayViewCheckLabel());
5550
5551 VarHandleTarget target = GetVarHandleTarget(invoke);
5552 {
5553 // Use the offset temporary register. It is not used yet at this point.
5554 vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
5555
5556 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5557 vixl32::Register temp2 = temps.Acquire();
5558
5559 // The main path checked that the coordinateType0 is an array class that matches
5560 // the class of the actual coordinate argument but it does not match the value type.
5561 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5562 __ Ldr(temp, MemOperand(varhandle, class_offset.Int32Value()));
5563 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
5564 codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5565 __ Cmp(temp, temp2);
5566 __ B(ne, GetEntryLabel());
5567
5568 // Check for array index out of bounds.
5569 __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
5570 if (!temp.IsLow()) {
5571 // Avoid using the 32-bit `cmp temp, #imm` in IT block by loading `size` into `temp2`.
5572 __ Mov(temp2, size_operand);
5573 }
5574 __ Subs(temp, temp, index);
5575 {
5576 // Use ExactAssemblyScope here because we are using IT.
5577 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
5578 2 * k16BitT32InstructionSizeInBytes);
5579 __ it(hs);
5580 if (temp.IsLow()) {
5581 __ cmp(hs, temp, size_operand);
5582 } else {
5583 __ cmp(hs, temp, temp2);
5584 }
5585 }
5586 __ B(lo, GetEntryLabel());
5587
5588 // Construct the target.
5589 __ Add(target.offset, index, data_offset.Int32Value()); // Note: `temp` cannot be used below.
5590
5591 // Alignment check. For unaligned access, go to the runtime.
5592 DCHECK(IsPowerOfTwo(size));
5593 __ Tst(target.offset, dchecked_integral_cast<int32_t>(size - 1u));
5594 __ B(ne, GetEntryLabel());
5595
5596 // Byte order check. For native byte order return to the main path.
5597 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
5598 HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
5599 if (IsZeroBitPattern(arg)) {
5600 // There is no reason to differentiate between native byte order and byte-swap
5601 // for setting a zero bit pattern. Just return to the main path.
5602 __ B(GetNativeByteOrderLabel());
5603 return;
5604 }
5605 }
5606 __ Ldr(temp2, MemOperand(varhandle, native_byte_order_offset.Int32Value()));
5607 __ Cmp(temp2, 0);
5608 __ B(ne, GetNativeByteOrderLabel());
5609 }
5610
5611 switch (access_mode_template) {
5612 case mirror::VarHandle::AccessModeTemplate::kGet:
5613 GenerateVarHandleGet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5614 break;
5615 case mirror::VarHandle::AccessModeTemplate::kSet:
5616 GenerateVarHandleSet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5617 break;
5618 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5619 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5620 GenerateVarHandleCompareAndSetOrExchange(
5621 invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5622 break;
5623 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5624 GenerateVarHandleGetAndUpdate(
5625 invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5626 break;
5627 }
5628 __ B(GetExitLabel());
5629 }
5630
5631 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name)
5632 UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED);
5633 #undef MARK_UNIMPLEMENTED
5634
5635 UNREACHABLE_INTRINSICS(ARMVIXL)
5636
5637 #undef __
5638
5639 } // namespace arm
5640 } // namespace art
5641