1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm_vixl.h"
18 
19 #include "arch/arm/callee_save_frame_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "art_method.h"
22 #include "code_generator_arm_vixl.h"
23 #include "common_arm.h"
24 #include "heap_poisoning.h"
25 #include "intrinsic_objects.h"
26 #include "intrinsics.h"
27 #include "intrinsics_utils.h"
28 #include "lock_word.h"
29 #include "mirror/array-inl.h"
30 #include "mirror/object_array-inl.h"
31 #include "mirror/reference.h"
32 #include "mirror/string-inl.h"
33 #include "scoped_thread_state_change-inl.h"
34 #include "thread-current-inl.h"
35 #include "well_known_classes.h"
36 
37 #include "aarch32/constants-aarch32.h"
38 
39 namespace art HIDDEN {
40 namespace arm {
41 
42 #define __ assembler->GetVIXLAssembler()->
43 
44 using helpers::DRegisterFrom;
45 using helpers::HighRegisterFrom;
46 using helpers::InputDRegisterAt;
47 using helpers::InputRegisterAt;
48 using helpers::InputSRegisterAt;
49 using helpers::Int32ConstantFrom;
50 using helpers::LocationFrom;
51 using helpers::LowRegisterFrom;
52 using helpers::LowSRegisterFrom;
53 using helpers::HighSRegisterFrom;
54 using helpers::OperandFrom;
55 using helpers::OutputDRegister;
56 using helpers::OutputRegister;
57 using helpers::RegisterFrom;
58 using helpers::SRegisterFrom;
59 
60 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
61 
62 using vixl::ExactAssemblyScope;
63 using vixl::CodeBufferCheckScope;
64 
GetAssembler()65 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
66   return codegen_->GetAssembler();
67 }
68 
GetAllocator()69 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
70   return codegen_->GetGraph()->GetAllocator();
71 }
72 
73 using IntrinsicSlowPathARMVIXL = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARMVIXL,
74                                                    SlowPathCodeARMVIXL,
75                                                    ArmVIXLAssembler>;
76 
77 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
78 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
79  public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)80   explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
81       : SlowPathCodeARMVIXL(instruction) {
82   }
83 
EmitNativeCode(CodeGenerator * codegen)84   void EmitNativeCode(CodeGenerator* codegen) override {
85     DCHECK(codegen->EmitBakerReadBarrier());
86     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
87     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
88     LocationSummary* locations = instruction_->GetLocations();
89     DCHECK(locations->CanCall());
90     DCHECK(instruction_->IsInvokeStaticOrDirect())
91         << "Unexpected instruction in read barrier arraycopy slow path: "
92         << instruction_->DebugName();
93     DCHECK(instruction_->GetLocations()->Intrinsified());
94     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
95 
96     DataType::Type type = DataType::Type::kReference;
97     const int32_t element_size = DataType::Size(type);
98 
99     vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
100     vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
101     vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
102     vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
103 
104     __ Bind(GetEntryLabel());
105     // The source range and destination pointer were initialized before entering the slow-path.
106     vixl32::Label loop;
107     __ Bind(&loop);
108     __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
109     assembler->MaybeUnpoisonHeapReference(tmp);
110     // TODO: Inline the mark bit check before calling the runtime?
111     // tmp = ReadBarrier::Mark(tmp);
112     // No need to save live registers; it's taken care of by the
113     // entrypoint. Also, there is no need to update the stack mask,
114     // as this runtime call will not trigger a garbage collection.
115     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
116     // explanations.)
117     DCHECK(!tmp.IsSP());
118     DCHECK(!tmp.IsLR());
119     DCHECK(!tmp.IsPC());
120     // IP is used internally by the ReadBarrierMarkRegX entry point
121     // as a temporary (and not preserved).  It thus cannot be used by
122     // any live register in this slow path.
123     DCHECK(!src_curr_addr.Is(ip));
124     DCHECK(!dst_curr_addr.Is(ip));
125     DCHECK(!src_stop_addr.Is(ip));
126     DCHECK(!tmp.Is(ip));
127     DCHECK(tmp.IsRegister()) << tmp;
128     // TODO: Load the entrypoint once before the loop, instead of
129     // loading it at every iteration.
130     int32_t entry_point_offset =
131         Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
132     // This runtime call does not require a stack map.
133     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
134     assembler->MaybePoisonHeapReference(tmp);
135     __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
136     __ Cmp(src_curr_addr, src_stop_addr);
137     __ B(ne, &loop, /* is_far_target= */ false);
138     __ B(GetExitLabel());
139   }
140 
GetDescription() const141   const char* GetDescription() const override {
142     return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
143   }
144 
145  private:
146   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
147 };
148 
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)149 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
150     : allocator_(codegen->GetGraph()->GetAllocator()),
151       codegen_(codegen),
152       assembler_(codegen->GetAssembler()),
153       features_(codegen->GetInstructionSetFeatures()) {}
154 
TryDispatch(HInvoke * invoke)155 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
156   Dispatch(invoke);
157   LocationSummary* res = invoke->GetLocations();
158   if (res == nullptr) {
159     return false;
160   }
161   return res->Intrinsified();
162 }
163 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)164 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
165   LocationSummary* locations =
166       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
167   locations->SetInAt(0, Location::RequiresFpuRegister());
168   locations->SetOut(Location::RequiresRegister());
169 }
170 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)171 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
172   LocationSummary* locations =
173       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
174   locations->SetInAt(0, Location::RequiresRegister());
175   locations->SetOut(Location::RequiresFpuRegister());
176 }
177 
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)178 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
179   Location input = locations->InAt(0);
180   Location output = locations->Out();
181   if (is64bit) {
182     __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
183   } else {
184     __ Vmov(RegisterFrom(output), SRegisterFrom(input));
185   }
186 }
187 
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)188 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
189   Location input = locations->InAt(0);
190   Location output = locations->Out();
191   if (is64bit) {
192     __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
193   } else {
194     __ Vmov(SRegisterFrom(output), RegisterFrom(input));
195   }
196 }
197 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)198 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
199   CreateFPToIntLocations(allocator_, invoke);
200 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)201 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
202   CreateIntToFPLocations(allocator_, invoke);
203 }
204 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)205 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
206   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
207 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)208 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
209   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
210 }
211 
VisitFloatFloatToRawIntBits(HInvoke * invoke)212 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
213   CreateFPToIntLocations(allocator_, invoke);
214 }
VisitFloatIntBitsToFloat(HInvoke * invoke)215 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
216   CreateIntToFPLocations(allocator_, invoke);
217 }
218 
VisitFloatFloatToRawIntBits(HInvoke * invoke)219 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
220   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
221 }
VisitFloatIntBitsToFloat(HInvoke * invoke)222 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
223   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
224 }
225 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)226 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
227   LocationSummary* locations =
228       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
229   locations->SetInAt(0, Location::RequiresRegister());
230   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
231 }
232 
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)233 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
234   LocationSummary* locations =
235       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
236   locations->SetInAt(0, Location::RequiresRegister());
237   locations->SetInAt(1, Location::RequiresRegister());
238   // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
239   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
240 }
241 
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)242 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
243   LocationSummary* locations =
244       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
245   locations->SetInAt(0, Location::RequiresRegister());
246   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
247 }
248 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)249 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
250   LocationSummary* locations =
251       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
252   locations->SetInAt(0, Location::RequiresFpuRegister());
253   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
254 }
255 
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)256 static void GenNumberOfLeadingZeros(HInvoke* invoke,
257                                     DataType::Type type,
258                                     CodeGeneratorARMVIXL* codegen) {
259   ArmVIXLAssembler* assembler = codegen->GetAssembler();
260   LocationSummary* locations = invoke->GetLocations();
261   Location in = locations->InAt(0);
262   vixl32::Register out = RegisterFrom(locations->Out());
263 
264   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
265 
266   if (type == DataType::Type::kInt64) {
267     vixl32::Register in_reg_lo = LowRegisterFrom(in);
268     vixl32::Register in_reg_hi = HighRegisterFrom(in);
269     vixl32::Label end;
270     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
271     __ Clz(out, in_reg_hi);
272     __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
273     __ Clz(out, in_reg_lo);
274     __ Add(out, out, 32);
275     if (end.IsReferenced()) {
276       __ Bind(&end);
277     }
278   } else {
279     __ Clz(out, RegisterFrom(in));
280   }
281 }
282 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)283 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
284   CreateIntToIntLocations(allocator_, invoke);
285 }
286 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)287 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
288   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
289 }
290 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)291 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
292   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
293 }
294 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)295 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
296   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
297 }
298 
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)299 static void GenNumberOfTrailingZeros(HInvoke* invoke,
300                                      DataType::Type type,
301                                      CodeGeneratorARMVIXL* codegen) {
302   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
303 
304   ArmVIXLAssembler* assembler = codegen->GetAssembler();
305   LocationSummary* locations = invoke->GetLocations();
306   vixl32::Register out = RegisterFrom(locations->Out());
307 
308   if (type == DataType::Type::kInt64) {
309     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
310     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
311     vixl32::Label end;
312     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
313     __ Rbit(out, in_reg_lo);
314     __ Clz(out, out);
315     __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
316     __ Rbit(out, in_reg_hi);
317     __ Clz(out, out);
318     __ Add(out, out, 32);
319     if (end.IsReferenced()) {
320       __ Bind(&end);
321     }
322   } else {
323     vixl32::Register in = RegisterFrom(locations->InAt(0));
324     __ Rbit(out, in);
325     __ Clz(out, out);
326   }
327 }
328 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)329 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
330   CreateIntToIntLocations(allocator_, invoke);
331 }
332 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)333 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
334   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
335 }
336 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)337 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
338   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
339 }
340 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)341 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
342   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
343 }
344 
VisitMathSqrt(HInvoke * invoke)345 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
346   CreateFPToFPLocations(allocator_, invoke);
347 }
348 
VisitMathSqrt(HInvoke * invoke)349 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
350   ArmVIXLAssembler* assembler = GetAssembler();
351   __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
352 }
353 
VisitMathRint(HInvoke * invoke)354 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
355   if (features_.HasARMv8AInstructions()) {
356     CreateFPToFPLocations(allocator_, invoke);
357   }
358 }
359 
VisitMathRint(HInvoke * invoke)360 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
361   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
362   ArmVIXLAssembler* assembler = GetAssembler();
363   __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
364 }
365 
VisitMathRoundFloat(HInvoke * invoke)366 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
367   if (features_.HasARMv8AInstructions()) {
368     LocationSummary* locations =
369         new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
370     locations->SetInAt(0, Location::RequiresFpuRegister());
371     locations->SetOut(Location::RequiresRegister());
372     locations->AddTemp(Location::RequiresFpuRegister());
373   }
374 }
375 
VisitMathRoundFloat(HInvoke * invoke)376 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
377   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
378 
379   ArmVIXLAssembler* assembler = GetAssembler();
380   vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
381   vixl32::Register out_reg = OutputRegister(invoke);
382   vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
383   vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
384   vixl32::Label done;
385   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
386 
387   // Round to nearest integer, ties away from zero.
388   __ Vcvta(S32, F32, temp1, in_reg);
389   __ Vmov(out_reg, temp1);
390 
391   // For positive, zero or NaN inputs, rounding is done.
392   __ Cmp(out_reg, 0);
393   __ B(ge, final_label, /* is_far_target= */ false);
394 
395   // Handle input < 0 cases.
396   // If input is negative but not a tie, previous result (round to nearest) is valid.
397   // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
398   __ Vrinta(F32, temp1, in_reg);
399   __ Vmov(temp2, 0.5);
400   __ Vsub(F32, temp1, in_reg, temp1);
401   __ Vcmp(F32, temp1, temp2);
402   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
403   {
404     // Use ExactAssemblyScope here because we are using IT.
405     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
406                                 2 * kMaxInstructionSizeInBytes,
407                                 CodeBufferCheckScope::kMaximumSize);
408     __ it(eq);
409     __ add(eq, out_reg, out_reg, 1);
410   }
411 
412   if (done.IsReferenced()) {
413     __ Bind(&done);
414   }
415 }
416 
VisitMemoryPeekByte(HInvoke * invoke)417 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
418   CreateIntToIntLocations(allocator_, invoke);
419 }
420 
VisitMemoryPeekByte(HInvoke * invoke)421 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
422   ArmVIXLAssembler* assembler = GetAssembler();
423   // Ignore upper 4B of long address.
424   __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
425 }
426 
VisitMemoryPeekIntNative(HInvoke * invoke)427 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
428   CreateIntToIntLocations(allocator_, invoke);
429 }
430 
VisitMemoryPeekIntNative(HInvoke * invoke)431 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
432   ArmVIXLAssembler* assembler = GetAssembler();
433   // Ignore upper 4B of long address.
434   __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
435 }
436 
VisitMemoryPeekLongNative(HInvoke * invoke)437 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
438   CreateIntToIntLocations(allocator_, invoke);
439 }
440 
VisitMemoryPeekLongNative(HInvoke * invoke)441 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
442   ArmVIXLAssembler* assembler = GetAssembler();
443   // Ignore upper 4B of long address.
444   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
445   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
446   // exception. So we can't use ldrd as addr may be unaligned.
447   vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
448   vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
449   if (addr.Is(lo)) {
450     __ Ldr(hi, MemOperand(addr, 4));
451     __ Ldr(lo, MemOperand(addr));
452   } else {
453     __ Ldr(lo, MemOperand(addr));
454     __ Ldr(hi, MemOperand(addr, 4));
455   }
456 }
457 
VisitMemoryPeekShortNative(HInvoke * invoke)458 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
459   CreateIntToIntLocations(allocator_, invoke);
460 }
461 
VisitMemoryPeekShortNative(HInvoke * invoke)462 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
463   ArmVIXLAssembler* assembler = GetAssembler();
464   // Ignore upper 4B of long address.
465   __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
466 }
467 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)468 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
469   LocationSummary* locations =
470       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
471   locations->SetInAt(0, Location::RequiresRegister());
472   locations->SetInAt(1, Location::RequiresRegister());
473 }
474 
VisitMemoryPokeByte(HInvoke * invoke)475 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
476   CreateIntIntToVoidLocations(allocator_, invoke);
477 }
478 
VisitMemoryPokeByte(HInvoke * invoke)479 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
480   ArmVIXLAssembler* assembler = GetAssembler();
481   __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
482 }
483 
VisitMemoryPokeIntNative(HInvoke * invoke)484 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
485   CreateIntIntToVoidLocations(allocator_, invoke);
486 }
487 
VisitMemoryPokeIntNative(HInvoke * invoke)488 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
489   ArmVIXLAssembler* assembler = GetAssembler();
490   __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
491 }
492 
VisitMemoryPokeLongNative(HInvoke * invoke)493 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
494   CreateIntIntToVoidLocations(allocator_, invoke);
495 }
496 
VisitMemoryPokeLongNative(HInvoke * invoke)497 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
498   ArmVIXLAssembler* assembler = GetAssembler();
499   // Ignore upper 4B of long address.
500   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
501   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
502   // exception. So we can't use ldrd as addr may be unaligned.
503   __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
504   __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
505 }
506 
VisitMemoryPokeShortNative(HInvoke * invoke)507 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
508   CreateIntIntToVoidLocations(allocator_, invoke);
509 }
510 
VisitMemoryPokeShortNative(HInvoke * invoke)511 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
512   ArmVIXLAssembler* assembler = GetAssembler();
513   __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
514 }
515 
VisitThreadCurrentThread(HInvoke * invoke)516 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
517   LocationSummary* locations =
518       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
519   locations->SetOut(Location::RequiresRegister());
520 }
521 
VisitThreadCurrentThread(HInvoke * invoke)522 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
523   ArmVIXLAssembler* assembler = GetAssembler();
524   __ Ldr(OutputRegister(invoke),
525          MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
526 }
527 
VisitStringCompareTo(HInvoke * invoke)528 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
529   // The inputs plus one temp.
530   LocationSummary* locations =
531       new (allocator_) LocationSummary(invoke,
532                                        invoke->InputAt(1)->CanBeNull()
533                                            ? LocationSummary::kCallOnSlowPath
534                                            : LocationSummary::kNoCall,
535                                        kIntrinsified);
536   locations->SetInAt(0, Location::RequiresRegister());
537   locations->SetInAt(1, Location::RequiresRegister());
538   locations->AddTemp(Location::RequiresRegister());
539   locations->AddTemp(Location::RequiresRegister());
540   locations->AddTemp(Location::RequiresRegister());
541   // Need temporary registers for String compression's feature.
542   if (mirror::kUseStringCompression) {
543     locations->AddTemp(Location::RequiresRegister());
544   }
545   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
546 }
547 
548 // Forward declaration.
549 //
550 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
551 // by the compiler for every C++ function, and if this function gets inlined in
552 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
553 // build failure. That is the reason why NO_INLINE attribute is used.
554 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
555                                                   HInvoke* invoke,
556                                                   vixl32::Label* end,
557                                                   vixl32::Label* different_compression);
558 
VisitStringCompareTo(HInvoke * invoke)559 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
560   ArmVIXLAssembler* assembler = GetAssembler();
561   LocationSummary* locations = invoke->GetLocations();
562 
563   const vixl32::Register str = InputRegisterAt(invoke, 0);
564   const vixl32::Register arg = InputRegisterAt(invoke, 1);
565   const vixl32::Register out = OutputRegister(invoke);
566 
567   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
568   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
569   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
570   vixl32::Register temp3;
571   if (mirror::kUseStringCompression) {
572     temp3 = RegisterFrom(locations->GetTemp(3));
573   }
574 
575   vixl32::Label end;
576   vixl32::Label different_compression;
577 
578   // Get offsets of count and value fields within a string object.
579   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
580 
581   // Note that the null check must have been done earlier.
582   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
583 
584   // Take slow path and throw if input can be and is null.
585   SlowPathCodeARMVIXL* slow_path = nullptr;
586   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
587   if (can_slow_path) {
588     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
589     codegen_->AddSlowPath(slow_path);
590     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
591   }
592 
593   // Reference equality check, return 0 if same reference.
594   __ Subs(out, str, arg);
595   __ B(eq, &end);
596 
597   if (mirror::kUseStringCompression) {
598     // Load `count` fields of this and argument strings.
599     __ Ldr(temp3, MemOperand(str, count_offset));
600     __ Ldr(temp2, MemOperand(arg, count_offset));
601     // Extract lengths from the `count` fields.
602     __ Lsr(temp0, temp3, 1u);
603     __ Lsr(temp1, temp2, 1u);
604   } else {
605     // Load lengths of this and argument strings.
606     __ Ldr(temp0, MemOperand(str, count_offset));
607     __ Ldr(temp1, MemOperand(arg, count_offset));
608   }
609   // out = length diff.
610   __ Subs(out, temp0, temp1);
611   // temp0 = min(len(str), len(arg)).
612 
613   {
614     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
615                            2 * kMaxInstructionSizeInBytes,
616                            CodeBufferCheckScope::kMaximumSize);
617 
618     __ it(gt);
619     __ mov(gt, temp0, temp1);
620   }
621 
622   // Shorter string is empty?
623   // Note that mirror::kUseStringCompression==true introduces lots of instructions,
624   // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
625   __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
626 
627   if (mirror::kUseStringCompression) {
628     // Check if both strings using same compression style to use this comparison loop.
629     __ Eors(temp2, temp2, temp3);
630     __ Lsrs(temp2, temp2, 1u);
631     __ B(cs, &different_compression);
632     // For string compression, calculate the number of bytes to compare (not chars).
633     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
634     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
635 
636     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
637                            2 * kMaxInstructionSizeInBytes,
638                            CodeBufferCheckScope::kMaximumSize);
639 
640     __ it(ne);
641     __ add(ne, temp0, temp0, temp0);
642   }
643 
644 
645   GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
646 
647   __ Bind(&end);
648 
649   if (can_slow_path) {
650     __ Bind(slow_path->GetExitLabel());
651   }
652 }
653 
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)654 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
655                                         HInvoke* invoke,
656                                         vixl32::Label* end,
657                                         vixl32::Label* different_compression) {
658   LocationSummary* locations = invoke->GetLocations();
659 
660   const vixl32::Register str = InputRegisterAt(invoke, 0);
661   const vixl32::Register arg = InputRegisterAt(invoke, 1);
662   const vixl32::Register out = OutputRegister(invoke);
663 
664   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
665   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
666   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
667   vixl32::Register temp3;
668   if (mirror::kUseStringCompression) {
669     temp3 = RegisterFrom(locations->GetTemp(3));
670   }
671 
672   vixl32::Label loop;
673   vixl32::Label find_char_diff;
674 
675   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
676   // Store offset of string value in preparation for comparison loop.
677   __ Mov(temp1, value_offset);
678 
679   // Assertions that must hold in order to compare multiple characters at a time.
680   CHECK_ALIGNED(value_offset, 8);
681   static_assert(IsAligned<8>(kObjectAlignment),
682                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
683 
684   const unsigned char_size = DataType::Size(DataType::Type::kUint16);
685   DCHECK_EQ(char_size, 2u);
686 
687   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
688 
689   vixl32::Label find_char_diff_2nd_cmp;
690   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
691   __ Bind(&loop);
692   vixl32::Register temp_reg = temps.Acquire();
693   __ Ldr(temp_reg, MemOperand(str, temp1));
694   __ Ldr(temp2, MemOperand(arg, temp1));
695   __ Cmp(temp_reg, temp2);
696   __ B(ne, &find_char_diff, /* is_far_target= */ false);
697   __ Add(temp1, temp1, char_size * 2);
698 
699   __ Ldr(temp_reg, MemOperand(str, temp1));
700   __ Ldr(temp2, MemOperand(arg, temp1));
701   __ Cmp(temp_reg, temp2);
702   __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
703   __ Add(temp1, temp1, char_size * 2);
704   // With string compression, we have compared 8 bytes, otherwise 4 chars.
705   __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
706   __ B(hi, &loop, /* is_far_target= */ false);
707   __ B(end);
708 
709   __ Bind(&find_char_diff_2nd_cmp);
710   if (mirror::kUseStringCompression) {
711     __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
712     __ B(ls, end, /* is_far_target= */ false);  // Was the second comparison fully beyond the end?
713   } else {
714     // Without string compression, we can start treating temp0 as signed
715     // and rely on the signed comparison below.
716     __ Sub(temp0, temp0, 2);
717   }
718 
719   // Find the single character difference.
720   __ Bind(&find_char_diff);
721   // Get the bit position of the first character that differs.
722   __ Eor(temp1, temp2, temp_reg);
723   __ Rbit(temp1, temp1);
724   __ Clz(temp1, temp1);
725 
726   // temp0 = number of characters remaining to compare.
727   // (Without string compression, it could be < 1 if a difference is found by the second CMP
728   // in the comparison loop, and after the end of the shorter string data).
729 
730   // Without string compression (temp1 >> 4) = character where difference occurs between the last
731   // two words compared, in the interval [0,1].
732   // (0 for low half-word different, 1 for high half-word different).
733   // With string compression, (temp1 << 3) = byte where the difference occurs,
734   // in the interval [0,3].
735 
736   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
737   // the remaining string data, so just return length diff (out).
738   // The comparison is unsigned for string compression, otherwise signed.
739   __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
740   __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
741 
742   // Extract the characters and calculate the difference.
743   if (mirror::kUseStringCompression) {
744     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
745     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
746     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
747     __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
748     __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
749     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
750     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
751     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
752     __ Lsr(out, temp_reg, temp1);                       // Extract first character.
753     __ And(temp2, temp2, temp3);
754     __ And(out, out, temp3);
755   } else {
756     __ Bic(temp1, temp1, 0xf);
757     __ Lsr(temp2, temp2, temp1);
758     __ Lsr(out, temp_reg, temp1);
759     __ Movt(temp2, 0);
760     __ Movt(out, 0);
761   }
762 
763   __ Sub(out, out, temp2);
764   temps.Release(temp_reg);
765 
766   if (mirror::kUseStringCompression) {
767     __ B(end);
768     __ Bind(different_compression);
769 
770     // Comparison for different compression style.
771     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
772     DCHECK_EQ(c_char_size, 1u);
773 
774     // We want to free up the temp3, currently holding `str.count`, for comparison.
775     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
776     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
777     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
778     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
779     __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
780     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
781     __ Mov(temp1, str);
782     __ Mov(temp2, arg);
783     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
784     {
785       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
786                              3 * kMaxInstructionSizeInBytes,
787                              CodeBufferCheckScope::kMaximumSize);
788       __ itt(cs);                             // Interleave with selection of temp1 and temp2.
789       __ mov(cs, temp1, arg);                 // Preserves flags.
790       __ mov(cs, temp2, str);                 // Preserves flags.
791     }
792     __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
793 
794     // Adjust temp1 and temp2 from string pointers to data pointers.
795     __ Add(temp1, temp1, value_offset);
796     __ Add(temp2, temp2, value_offset);
797 
798     vixl32::Label different_compression_loop;
799     vixl32::Label different_compression_diff;
800 
801     // Main loop for different compression.
802     temp_reg = temps.Acquire();
803     __ Bind(&different_compression_loop);
804     __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
805     __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
806     __ Cmp(temp_reg, temp3);
807     __ B(ne, &different_compression_diff, /* is_far_target= */ false);
808     __ Subs(temp0, temp0, 2);
809     __ B(hi, &different_compression_loop, /* is_far_target= */ false);
810     __ B(end);
811 
812     // Calculate the difference.
813     __ Bind(&different_compression_diff);
814     __ Sub(out, temp_reg, temp3);
815     temps.Release(temp_reg);
816     // Flip the difference if the `arg` is compressed.
817     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
818     __ Lsrs(temp0, temp0, 1u);
819     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
820                   "Expecting 0=compressed, 1=uncompressed");
821 
822     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
823                            2 * kMaxInstructionSizeInBytes,
824                            CodeBufferCheckScope::kMaximumSize);
825     __ it(cc);
826     __ rsb(cc, out, out, 0);
827   }
828 }
829 
830 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
831 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
832 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
833 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
834 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
835 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
836 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)837 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
838   if (candidate->IsLoadString()) {
839     HLoadString* load_string = candidate->AsLoadString();
840     const DexFile& dex_file = load_string->GetDexFile();
841     return dex_file.GetStringDataAndUtf16Length(load_string->GetStringIndex(), utf16_length);
842   }
843   return nullptr;
844 }
845 
VisitStringEquals(HInvoke * invoke)846 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
847   LocationSummary* locations =
848       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
849   InvokeRuntimeCallingConventionARMVIXL calling_convention;
850   locations->SetInAt(0, Location::RequiresRegister());
851   locations->SetInAt(1, Location::RequiresRegister());
852 
853   // Temporary registers to store lengths of strings and for calculations.
854   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
855   locations->AddTemp(LocationFrom(r0));
856 
857   // For the generic implementation and for long const strings we need an extra temporary.
858   // We do not need it for short const strings, up to 4 bytes, see code generation below.
859   uint32_t const_string_length = 0u;
860   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
861   if (const_string == nullptr) {
862     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
863   }
864   bool is_compressed =
865       mirror::kUseStringCompression &&
866       const_string != nullptr &&
867       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
868   if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
869     locations->AddTemp(Location::RequiresRegister());
870   }
871 
872   // TODO: If the String.equals() is used only for an immediately following HIf, we can
873   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
874   // Then we shall need an extra temporary register instead of the output register.
875   locations->SetOut(Location::RequiresRegister());
876 }
877 
VisitStringEquals(HInvoke * invoke)878 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
879   ArmVIXLAssembler* assembler = GetAssembler();
880   LocationSummary* locations = invoke->GetLocations();
881 
882   vixl32::Register str = InputRegisterAt(invoke, 0);
883   vixl32::Register arg = InputRegisterAt(invoke, 1);
884   vixl32::Register out = OutputRegister(invoke);
885 
886   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
887 
888   vixl32::Label loop;
889   vixl32::Label end;
890   vixl32::Label return_true;
891   vixl32::Label return_false;
892   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
893 
894   // Get offsets of count, value, and class fields within a string object.
895   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
896   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
897   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
898 
899   // Note that the null check must have been done earlier.
900   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
901 
902   StringEqualsOptimizations optimizations(invoke);
903   if (!optimizations.GetArgumentNotNull()) {
904     // Check if input is null, return false if it is.
905     __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
906   }
907 
908   // Reference equality check, return true if same reference.
909   __ Cmp(str, arg);
910   __ B(eq, &return_true, /* is_far_target= */ false);
911 
912   if (!optimizations.GetArgumentIsString()) {
913     // Instanceof check for the argument by comparing class fields.
914     // All string objects must have the same type since String cannot be subclassed.
915     // Receiver must be a string object, so its class field is equal to all strings' class fields.
916     // If the argument is a string object, its class field must be equal to receiver's class field.
917     //
918     // As the String class is expected to be non-movable, we can read the class
919     // field from String.equals' arguments without read barriers.
920     AssertNonMovableStringClass();
921     // /* HeapReference<Class> */ temp = str->klass_
922     __ Ldr(temp, MemOperand(str, class_offset));
923     // /* HeapReference<Class> */ out = arg->klass_
924     __ Ldr(out, MemOperand(arg, class_offset));
925     // Also, because we use the previously loaded class references only in the
926     // following comparison, we don't need to unpoison them.
927     __ Cmp(temp, out);
928     __ B(ne, &return_false, /* is_far_target= */ false);
929   }
930 
931   // Check if one of the inputs is a const string. Do not special-case both strings
932   // being const, such cases should be handled by constant folding if needed.
933   uint32_t const_string_length = 0u;
934   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
935   if (const_string == nullptr) {
936     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
937     if (const_string != nullptr) {
938       std::swap(str, arg);  // Make sure the const string is in `str`.
939     }
940   }
941   bool is_compressed =
942       mirror::kUseStringCompression &&
943       const_string != nullptr &&
944       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
945 
946   if (const_string != nullptr) {
947     // Load `count` field of the argument string and check if it matches the const string.
948     // Also compares the compression style, if differs return false.
949     __ Ldr(temp, MemOperand(arg, count_offset));
950     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
951     __ B(ne, &return_false, /* is_far_target= */ false);
952   } else {
953     // Load `count` fields of this and argument strings.
954     __ Ldr(temp, MemOperand(str, count_offset));
955     __ Ldr(out, MemOperand(arg, count_offset));
956     // Check if `count` fields are equal, return false if they're not.
957     // Also compares the compression style, if differs return false.
958     __ Cmp(temp, out);
959     __ B(ne, &return_false, /* is_far_target= */ false);
960   }
961 
962   // Assertions that must hold in order to compare strings 4 bytes at a time.
963   // Ok to do this because strings are zero-padded to kObjectAlignment.
964   DCHECK_ALIGNED(value_offset, 4);
965   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
966 
967   if (const_string != nullptr &&
968       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
969                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
970     // Load and compare the contents. Though we know the contents of the short const string
971     // at compile time, materializing constants may be more code than loading from memory.
972     int32_t offset = value_offset;
973     size_t remaining_bytes =
974         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
975     while (remaining_bytes > sizeof(uint32_t)) {
976       vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
977       UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
978       vixl32::Register temp2 = scratch_scope.Acquire();
979       __ Ldrd(temp, temp1, MemOperand(str, offset));
980       __ Ldrd(temp2, out, MemOperand(arg, offset));
981       __ Cmp(temp, temp2);
982       __ B(ne, &return_false, /* is_far_target= */ false);
983       __ Cmp(temp1, out);
984       __ B(ne, &return_false, /* is_far_target= */ false);
985       offset += 2u * sizeof(uint32_t);
986       remaining_bytes -= 2u * sizeof(uint32_t);
987     }
988     if (remaining_bytes != 0u) {
989       __ Ldr(temp, MemOperand(str, offset));
990       __ Ldr(out, MemOperand(arg, offset));
991       __ Cmp(temp, out);
992       __ B(ne, &return_false, /* is_far_target= */ false);
993     }
994   } else {
995     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
996     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
997                   "Expecting 0=compressed, 1=uncompressed");
998     __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
999 
1000     if (mirror::kUseStringCompression) {
1001       // For string compression, calculate the number of bytes to compare (not chars).
1002       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1003       __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
1004       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1005                              2 * kMaxInstructionSizeInBytes,
1006                              CodeBufferCheckScope::kMaximumSize);
1007       __ it(cs);                                      // If uncompressed,
1008       __ add(cs, temp, temp, temp);                   //   double the byte count.
1009     }
1010 
1011     vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1012     UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1013     vixl32::Register temp2 = scratch_scope.Acquire();
1014 
1015     // Store offset of string value in preparation for comparison loop.
1016     __ Mov(temp1, value_offset);
1017 
1018     // Loop to compare strings 4 bytes at a time starting at the front of the string.
1019     __ Bind(&loop);
1020     __ Ldr(out, MemOperand(str, temp1));
1021     __ Ldr(temp2, MemOperand(arg, temp1));
1022     __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1023     __ Cmp(out, temp2);
1024     __ B(ne, &return_false, /* is_far_target= */ false);
1025     // With string compression, we have compared 4 bytes, otherwise 2 chars.
1026     __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1027     __ B(hi, &loop, /* is_far_target= */ false);
1028   }
1029 
1030   // Return true and exit the function.
1031   // If loop does not result in returning false, we return true.
1032   __ Bind(&return_true);
1033   __ Mov(out, 1);
1034   __ B(final_label);
1035 
1036   // Return false and exit the function.
1037   __ Bind(&return_false);
1038   __ Mov(out, 0);
1039 
1040   if (end.IsReferenced()) {
1041     __ Bind(&end);
1042   }
1043 }
1044 
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1045 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1046                                        ArmVIXLAssembler* assembler,
1047                                        CodeGeneratorARMVIXL* codegen,
1048                                        bool start_at_zero) {
1049   LocationSummary* locations = invoke->GetLocations();
1050 
1051   // Note that the null check must have been done earlier.
1052   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1053 
1054   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1055   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1056   SlowPathCodeARMVIXL* slow_path = nullptr;
1057   HInstruction* code_point = invoke->InputAt(1);
1058   if (code_point->IsIntConstant()) {
1059     if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1060         std::numeric_limits<uint16_t>::max()) {
1061       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1062       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1063       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1064       codegen->AddSlowPath(slow_path);
1065       __ B(slow_path->GetEntryLabel());
1066       __ Bind(slow_path->GetExitLabel());
1067       return;
1068     }
1069   } else if (code_point->GetType() != DataType::Type::kUint16) {
1070     vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1071     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1072     __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1073     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1074     codegen->AddSlowPath(slow_path);
1075     __ B(hs, slow_path->GetEntryLabel());
1076   }
1077 
1078   if (start_at_zero) {
1079     vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1080     DCHECK(tmp_reg.Is(r2));
1081     // Start-index = 0.
1082     __ Mov(tmp_reg, 0);
1083   }
1084 
1085   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1086   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1087 
1088   if (slow_path != nullptr) {
1089     __ Bind(slow_path->GetExitLabel());
1090   }
1091 }
1092 
VisitStringIndexOf(HInvoke * invoke)1093 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1094   LocationSummary* locations = new (allocator_) LocationSummary(
1095       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1096   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1097   // best to align the inputs accordingly.
1098   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1099   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1100   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1101   locations->SetOut(LocationFrom(r0));
1102 
1103   // Need to send start-index=0.
1104   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1105 }
1106 
VisitStringIndexOf(HInvoke * invoke)1107 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1108   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1109 }
1110 
VisitStringIndexOfAfter(HInvoke * invoke)1111 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1112   LocationSummary* locations = new (allocator_) LocationSummary(
1113       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1114   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1115   // best to align the inputs accordingly.
1116   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1117   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1118   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1119   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1120   locations->SetOut(LocationFrom(r0));
1121 }
1122 
VisitStringIndexOfAfter(HInvoke * invoke)1123 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1124   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1125 }
1126 
VisitStringNewStringFromBytes(HInvoke * invoke)1127 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1128   LocationSummary* locations = new (allocator_) LocationSummary(
1129       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1130   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1131   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1132   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1133   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1134   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1135   locations->SetOut(LocationFrom(r0));
1136 }
1137 
VisitStringNewStringFromBytes(HInvoke * invoke)1138 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1139   ArmVIXLAssembler* assembler = GetAssembler();
1140   vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1141   __ Cmp(byte_array, 0);
1142   SlowPathCodeARMVIXL* slow_path =
1143       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1144   codegen_->AddSlowPath(slow_path);
1145   __ B(eq, slow_path->GetEntryLabel());
1146 
1147   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1148   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1149   __ Bind(slow_path->GetExitLabel());
1150 }
1151 
VisitStringNewStringFromChars(HInvoke * invoke)1152 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1153   LocationSummary* locations =
1154       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1155   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1156   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1157   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1158   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1159   locations->SetOut(LocationFrom(r0));
1160 }
1161 
VisitStringNewStringFromChars(HInvoke * invoke)1162 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1163   // No need to emit code checking whether `locations->InAt(2)` is a null
1164   // pointer, as callers of the native method
1165   //
1166   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1167   //
1168   // all include a null check on `data` before calling that method.
1169   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1170   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1171 }
1172 
VisitStringNewStringFromString(HInvoke * invoke)1173 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1174   LocationSummary* locations = new (allocator_) LocationSummary(
1175       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1176   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1177   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1178   locations->SetOut(LocationFrom(r0));
1179 }
1180 
VisitStringNewStringFromString(HInvoke * invoke)1181 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1182   ArmVIXLAssembler* assembler = GetAssembler();
1183   vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1184   __ Cmp(string_to_copy, 0);
1185   SlowPathCodeARMVIXL* slow_path =
1186       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1187   codegen_->AddSlowPath(slow_path);
1188   __ B(eq, slow_path->GetEntryLabel());
1189 
1190   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1191   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1192 
1193   __ Bind(slow_path->GetExitLabel());
1194 }
1195 
GenArrayAddress(ArmVIXLAssembler * assembler,vixl32::Register dest,vixl32::Register base,Location pos,DataType::Type type,int32_t data_offset)1196 static void GenArrayAddress(ArmVIXLAssembler* assembler,
1197                             vixl32::Register dest,
1198                             vixl32::Register base,
1199                             Location pos,
1200                             DataType::Type type,
1201                             int32_t data_offset) {
1202   if (pos.IsConstant()) {
1203     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
1204     __ Add(dest, base, static_cast<int32_t>(DataType::Size(type)) * constant + data_offset);
1205   } else {
1206     if (data_offset != 0) {
1207       __ Add(dest, base, data_offset);
1208       base = dest;
1209     }
1210     __ Add(dest, base, Operand(RegisterFrom(pos), LSL, DataType::SizeShift(type)));
1211   }
1212 }
1213 
LocationForSystemArrayCopyInput(ArmVIXLAssembler * assembler,HInstruction * input)1214 static Location LocationForSystemArrayCopyInput(ArmVIXLAssembler* assembler, HInstruction* input) {
1215   HIntConstant* const_input = input->AsIntConstantOrNull();
1216   if (const_input != nullptr && assembler->ShifterOperandCanAlwaysHold(const_input->GetValue())) {
1217     return Location::ConstantLocation(const_input);
1218   } else {
1219     return Location::RequiresRegister();
1220   }
1221 }
1222 
1223 // We choose to use the native implementation for longer copy lengths.
1224 static constexpr int32_t kSystemArrayCopyThreshold = 128;
1225 
VisitSystemArrayCopy(HInvoke * invoke)1226 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1227   // The only read barrier implementation supporting the
1228   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1229   if (codegen_->EmitNonBakerReadBarrier()) {
1230     return;
1231   }
1232 
1233   constexpr size_t kInitialNumTemps = 3u;  // We need at least three temps.
1234   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
1235       invoke, kSystemArrayCopyThreshold, kInitialNumTemps);
1236   if (locations != nullptr) {
1237     locations->SetInAt(1, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(1)));
1238     locations->SetInAt(3, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(3)));
1239     locations->SetInAt(4, LocationForSystemArrayCopyInput(assembler_, invoke->InputAt(4)));
1240     if (codegen_->EmitBakerReadBarrier()) {
1241       // Temporary register IP cannot be used in
1242       // ReadBarrierSystemArrayCopySlowPathARM (because that register
1243       // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1244       // temporary register from the register allocator.
1245       locations->AddTemp(Location::RequiresRegister());
1246     }
1247   }
1248 }
1249 
CheckSystemArrayCopyPosition(ArmVIXLAssembler * assembler,vixl32::Register array,Location pos,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_array_length,bool position_sign_checked)1250 static void CheckSystemArrayCopyPosition(ArmVIXLAssembler* assembler,
1251                                          vixl32::Register array,
1252                                          Location pos,
1253                                          Location length,
1254                                          SlowPathCodeARMVIXL* slow_path,
1255                                          vixl32::Register temp,
1256                                          bool length_is_array_length,
1257                                          bool position_sign_checked) {
1258   // Where is the length in the Array?
1259   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1260 
1261   if (pos.IsConstant()) {
1262     int32_t pos_const = Int32ConstantFrom(pos);
1263     if (pos_const == 0) {
1264       if (!length_is_array_length) {
1265         // Check that length(array) >= length.
1266         __ Ldr(temp, MemOperand(array, length_offset));
1267         __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1268         __ B(lt, slow_path->GetEntryLabel());
1269       }
1270     } else {
1271       // Calculate length(array) - pos.
1272       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1273       // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1274       __ Ldr(temp, MemOperand(array, length_offset));
1275       __ Sub(temp, temp, pos_const);
1276 
1277       // Check that (length(array) - pos) >= length.
1278       __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1279       __ B(lt, slow_path->GetEntryLabel());
1280     }
1281   } else if (length_is_array_length) {
1282     // The only way the copy can succeed is if pos is zero.
1283     vixl32::Register pos_reg = RegisterFrom(pos);
1284     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1285   } else {
1286     // Check that pos >= 0.
1287     vixl32::Register pos_reg = RegisterFrom(pos);
1288     if (!position_sign_checked) {
1289       __ Cmp(pos_reg, 0);
1290       __ B(lt, slow_path->GetEntryLabel());
1291     }
1292 
1293     // Calculate length(array) - pos.
1294     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
1295     // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
1296     __ Ldr(temp, MemOperand(array, length_offset));
1297     __ Sub(temp, temp, pos_reg);
1298 
1299     // Check that (length(array) - pos) >= length.
1300     __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
1301     __ B(lt, slow_path->GetEntryLabel());
1302   }
1303 }
1304 
VisitSystemArrayCopy(HInvoke * invoke)1305 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1306   // The only read barrier implementation supporting the
1307   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1308   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
1309 
1310   ArmVIXLAssembler* assembler = GetAssembler();
1311   LocationSummary* locations = invoke->GetLocations();
1312 
1313   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1314   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1315   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1316   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1317   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1318 
1319   vixl32::Register src = InputRegisterAt(invoke, 0);
1320   Location src_pos = locations->InAt(1);
1321   vixl32::Register dest = InputRegisterAt(invoke, 2);
1322   Location dest_pos = locations->InAt(3);
1323   Location length = locations->InAt(4);
1324   Location temp1_loc = locations->GetTemp(0);
1325   vixl32::Register temp1 = RegisterFrom(temp1_loc);
1326   Location temp2_loc = locations->GetTemp(1);
1327   vixl32::Register temp2 = RegisterFrom(temp2_loc);
1328   Location temp3_loc = locations->GetTemp(2);
1329   vixl32::Register temp3 = RegisterFrom(temp3_loc);
1330 
1331   SlowPathCodeARMVIXL* intrinsic_slow_path =
1332       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1333   codegen_->AddSlowPath(intrinsic_slow_path);
1334 
1335   vixl32::Label conditions_on_positions_validated;
1336   SystemArrayCopyOptimizations optimizations(invoke);
1337 
1338   // If source and destination are the same, we go to slow path if we need to do forward copying.
1339   // We do not need to do this check if the source and destination positions are the same.
1340   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
1341     if (src_pos.IsConstant()) {
1342       int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1343       if (dest_pos.IsConstant()) {
1344         int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1345         if (optimizations.GetDestinationIsSource()) {
1346           // Checked when building locations.
1347           DCHECK_GE(src_pos_constant, dest_pos_constant);
1348         } else if (src_pos_constant < dest_pos_constant) {
1349           __ Cmp(src, dest);
1350           __ B(eq, intrinsic_slow_path->GetEntryLabel());
1351         }
1352       } else {
1353         if (!optimizations.GetDestinationIsSource()) {
1354           __ Cmp(src, dest);
1355           __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1356         }
1357         __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1358         __ B(gt, intrinsic_slow_path->GetEntryLabel());
1359       }
1360     } else {
1361       if (!optimizations.GetDestinationIsSource()) {
1362         __ Cmp(src, dest);
1363         __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1364       }
1365       __ Cmp(RegisterFrom(src_pos), OperandFrom(dest_pos, DataType::Type::kInt32));
1366       __ B(lt, intrinsic_slow_path->GetEntryLabel());
1367     }
1368   }
1369 
1370   __ Bind(&conditions_on_positions_validated);
1371 
1372   if (!optimizations.GetSourceIsNotNull()) {
1373     // Bail out if the source is null.
1374     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1375   }
1376 
1377   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1378     // Bail out if the destination is null.
1379     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1380   }
1381 
1382   // We have already checked in the LocationsBuilder for the constant case.
1383   if (!length.IsConstant()) {
1384     // Merge the following two comparisons into one:
1385     //   If the length is negative, bail out (delegate to libcore's native implementation).
1386     //   If the length >= 128 then (currently) prefer native implementation.
1387     __ Cmp(RegisterFrom(length), kSystemArrayCopyThreshold);
1388     __ B(hs, intrinsic_slow_path->GetEntryLabel());
1389   }
1390 
1391   // Validity checks: source.
1392   CheckSystemArrayCopyPosition(assembler,
1393                                src,
1394                                src_pos,
1395                                length,
1396                                intrinsic_slow_path,
1397                                temp1,
1398                                optimizations.GetCountIsSourceLength(),
1399                                /*position_sign_checked=*/ false);
1400 
1401   // Validity checks: dest.
1402   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1403   CheckSystemArrayCopyPosition(assembler,
1404                                dest,
1405                                dest_pos,
1406                                length,
1407                                intrinsic_slow_path,
1408                                temp1,
1409                                optimizations.GetCountIsDestinationLength(),
1410                                dest_position_sign_checked);
1411 
1412   auto check_non_primitive_array_class = [&](vixl32::Register klass, vixl32::Register temp) {
1413     // No read barrier is needed for reading a chain of constant references for comparing
1414     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1415     // /* HeapReference<Class> */ temp = klass->component_type_
1416     __ Ldr(temp, MemOperand(klass, component_offset));
1417     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp);
1418     // Check that the component type is not null.
1419     __ CompareAndBranchIfZero(temp, intrinsic_slow_path->GetEntryLabel());
1420     // Check that the component type is not a primitive.
1421     // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
1422     __ Ldrh(temp, MemOperand(temp, primitive_offset));
1423     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1424     __ CompareAndBranchIfNonZero(temp, intrinsic_slow_path->GetEntryLabel());
1425   };
1426 
1427   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1428     // Check whether all elements of the source array are assignable to the component
1429     // type of the destination array. We do two checks: the classes are the same,
1430     // or the destination is Object[]. If none of these checks succeed, we go to the
1431     // slow path.
1432 
1433     if (codegen_->EmitBakerReadBarrier()) {
1434       // /* HeapReference<Class> */ temp1 = dest->klass_
1435       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1436           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
1437       // Register `temp1` is not trashed by the read barrier emitted
1438       // by GenerateFieldLoadWithBakerReadBarrier below, as that
1439       // method produces a call to a ReadBarrierMarkRegX entry point,
1440       // which saves all potentially live registers, including
1441       // temporaries such a `temp1`.
1442       // /* HeapReference<Class> */ temp2 = src->klass_
1443       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1444           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
1445     } else {
1446       // /* HeapReference<Class> */ temp1 = dest->klass_
1447       __ Ldr(temp1, MemOperand(dest, class_offset));
1448       assembler->MaybeUnpoisonHeapReference(temp1);
1449       // /* HeapReference<Class> */ temp2 = src->klass_
1450       __ Ldr(temp2, MemOperand(src, class_offset));
1451       assembler->MaybeUnpoisonHeapReference(temp2);
1452     }
1453 
1454     __ Cmp(temp1, temp2);
1455     if (optimizations.GetDestinationIsTypedObjectArray()) {
1456       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1457       vixl32::Label do_copy;
1458       // For class match, we can skip the source type check regardless of the optimization flag.
1459       __ B(eq, &do_copy, /* is_far_target= */ false);
1460       // No read barrier is needed for reading a chain of constant references
1461       // for comparing with null, see `ReadBarrierOption`.
1462       // /* HeapReference<Class> */ temp1 = temp1->component_type_
1463       __ Ldr(temp1, MemOperand(temp1, component_offset));
1464       assembler->MaybeUnpoisonHeapReference(temp1);
1465       // /* HeapReference<Class> */ temp1 = temp1->super_class_
1466       __ Ldr(temp1, MemOperand(temp1, super_offset));
1467       // No need to unpoison the result, we're comparing against null.
1468       __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1469       // Bail out if the source is not a non primitive array.
1470       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1471         check_non_primitive_array_class(temp2, temp2);
1472       }
1473       __ Bind(&do_copy);
1474     } else {
1475       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1476       // For class match, we can skip the array type check completely if at least one of source
1477       // and destination is known to be a non primitive array, otherwise one check is enough.
1478       __ B(ne, intrinsic_slow_path->GetEntryLabel());
1479       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1480           !optimizations.GetSourceIsNonPrimitiveArray()) {
1481         check_non_primitive_array_class(temp2, temp2);
1482       }
1483     }
1484   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1485     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1486     // Bail out if the source is not a non primitive array.
1487     // No read barrier is needed for reading a chain of constant references for comparing
1488     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1489      // /* HeapReference<Class> */ temp2 = src->klass_
1490     __ Ldr(temp2, MemOperand(src, class_offset));
1491     assembler->MaybeUnpoisonHeapReference(temp2);
1492     check_non_primitive_array_class(temp2, temp2);
1493   }
1494 
1495   if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
1496     // Null constant length: not need to emit the loop code at all.
1497   } else {
1498     vixl32::Label skip_copy_and_write_barrier;
1499     if (length.IsRegister()) {
1500       // Don't enter the copy loop if the length is null.
1501       __ CompareAndBranchIfZero(
1502           RegisterFrom(length), &skip_copy_and_write_barrier, /* is_far_target= */ false);
1503     }
1504 
1505     const DataType::Type type = DataType::Type::kReference;
1506     const int32_t element_size = DataType::Size(type);
1507     const int32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1508 
1509     SlowPathCodeARMVIXL* read_barrier_slow_path = nullptr;
1510     vixl32::Register rb_tmp;
1511     bool emit_rb = codegen_->EmitBakerReadBarrier();
1512     if (emit_rb) {
1513       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1514 
1515       // SystemArrayCopy implementation for Baker read barriers (see
1516       // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
1517       //
1518       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1519       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1520       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
1521       //   if (is_gray) {
1522       //     // Slow-path copy.
1523       //     do {
1524       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1525       //     } while (src_ptr != end_ptr)
1526       //   } else {
1527       //     // Fast-path copy.
1528       //     do {
1529       //       *dest_ptr++ = *src_ptr++;
1530       //     } while (src_ptr != end_ptr)
1531       //   }
1532 
1533       // /* int32_t */ monitor = src->monitor_
1534       rb_tmp = RegisterFrom(locations->GetTemp(3));
1535       __ Ldr(rb_tmp, MemOperand(src, monitor_offset));
1536       // /* LockWord */ lock_word = LockWord(monitor)
1537       static_assert(sizeof(LockWord) == sizeof(int32_t),
1538                     "art::LockWord and int32_t have different sizes.");
1539 
1540       // Introduce a dependency on the lock_word including the rb_state,
1541       // which shall prevent load-load reordering without using
1542       // a memory barrier (which would be more expensive).
1543       // `src` is unchanged by this operation, but its value now depends
1544       // on `temp2`.
1545       __ Add(src, src, Operand(rb_tmp, vixl32::LSR, 32));
1546 
1547       // Slow path used to copy array when `src` is gray.
1548       // Note that the base destination address is computed in `temp2`
1549       // by the slow path code.
1550       read_barrier_slow_path =
1551           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
1552       codegen_->AddSlowPath(read_barrier_slow_path);
1553     }
1554 
1555     // Compute the base source address in `temp1`.
1556     // Note that for read barrier, `temp1` (the base source address) is computed from `src`
1557     // (and `src_pos`) here, and thus honors the artificial dependency of `src` on `rb_tmp`.
1558     GenArrayAddress(GetAssembler(), temp1, src, src_pos, type, data_offset);
1559     // Compute the base destination address in `temp2`.
1560     GenArrayAddress(GetAssembler(), temp2, dest, dest_pos, type, data_offset);
1561     // Compute the end source address in `temp3`.
1562     GenArrayAddress(GetAssembler(), temp3, temp1, length, type, /*data_offset=*/ 0);
1563 
1564     if (emit_rb) {
1565       // Given the numeric representation, it's enough to check the low bit of the
1566       // rb_state. We do that by shifting the bit out of the lock word with LSRS
1567       // which can be a 16-bit instruction unlike the TST immediate.
1568       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1569       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1570       DCHECK(rb_tmp.IsValid());
1571       __ Lsrs(rb_tmp, rb_tmp, LockWord::kReadBarrierStateShift + 1);
1572       // Carry flag is the last bit shifted out by LSRS.
1573       __ B(cs, read_barrier_slow_path->GetEntryLabel());
1574     }
1575 
1576     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1577     // poison/unpoison.
1578     vixl32::Label loop;
1579     __ Bind(&loop);
1580     {
1581       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1582       const vixl32::Register temp_reg = temps.Acquire();
1583       __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1584       __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1585     }
1586     __ Cmp(temp1, temp3);
1587     __ B(ne, &loop, /* is_far_target= */ false);
1588 
1589     if (emit_rb) {
1590       DCHECK(read_barrier_slow_path != nullptr);
1591       __ Bind(read_barrier_slow_path->GetExitLabel());
1592     }
1593 
1594     // We only need one card marking on the destination array.
1595     codegen_->MarkGCCard(temp1, temp2, dest);
1596 
1597     __ Bind(&skip_copy_and_write_barrier);
1598   }
1599 
1600   __ Bind(intrinsic_slow_path->GetExitLabel());
1601 }
1602 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1603 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1604   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1605   // the code generator. Furthermore, the register allocator creates fixed live intervals
1606   // for all caller-saved registers because we are doing a function call. As a result, if
1607   // the input and output locations are unallocated, the register allocator runs out of
1608   // registers and fails; however, a debuggable graph is not the common case.
1609   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1610     return;
1611   }
1612 
1613   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1614   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1615   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1616 
1617   LocationSummary* const locations =
1618       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1619   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1620 
1621   locations->SetInAt(0, Location::RequiresFpuRegister());
1622   locations->SetOut(Location::RequiresFpuRegister());
1623   // Native code uses the soft float ABI.
1624   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1625   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1626 }
1627 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1628 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1629   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1630   // the code generator. Furthermore, the register allocator creates fixed live intervals
1631   // for all caller-saved registers because we are doing a function call. As a result, if
1632   // the input and output locations are unallocated, the register allocator runs out of
1633   // registers and fails; however, a debuggable graph is not the common case.
1634   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1635     return;
1636   }
1637 
1638   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1639   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1640   DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
1641   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1642 
1643   LocationSummary* const locations =
1644       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1645   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1646 
1647   locations->SetInAt(0, Location::RequiresFpuRegister());
1648   locations->SetInAt(1, Location::RequiresFpuRegister());
1649   locations->SetOut(Location::RequiresFpuRegister());
1650   // Native code uses the soft float ABI.
1651   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1652   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1653   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1654   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
1655 }
1656 
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1657 static void GenFPToFPCall(HInvoke* invoke,
1658                           ArmVIXLAssembler* assembler,
1659                           CodeGeneratorARMVIXL* codegen,
1660                           QuickEntrypointEnum entry) {
1661   LocationSummary* const locations = invoke->GetLocations();
1662 
1663   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1664   DCHECK(locations->WillCall() && locations->Intrinsified());
1665 
1666   // Native code uses the soft float ABI.
1667   __ Vmov(RegisterFrom(locations->GetTemp(0)),
1668           RegisterFrom(locations->GetTemp(1)),
1669           InputDRegisterAt(invoke, 0));
1670   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1671   __ Vmov(OutputDRegister(invoke),
1672           RegisterFrom(locations->GetTemp(0)),
1673           RegisterFrom(locations->GetTemp(1)));
1674 }
1675 
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1676 static void GenFPFPToFPCall(HInvoke* invoke,
1677                             ArmVIXLAssembler* assembler,
1678                             CodeGeneratorARMVIXL* codegen,
1679                             QuickEntrypointEnum entry) {
1680   LocationSummary* const locations = invoke->GetLocations();
1681 
1682   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1683   DCHECK(locations->WillCall() && locations->Intrinsified());
1684 
1685   // Native code uses the soft float ABI.
1686   __ Vmov(RegisterFrom(locations->GetTemp(0)),
1687           RegisterFrom(locations->GetTemp(1)),
1688           InputDRegisterAt(invoke, 0));
1689   __ Vmov(RegisterFrom(locations->GetTemp(2)),
1690           RegisterFrom(locations->GetTemp(3)),
1691           InputDRegisterAt(invoke, 1));
1692   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1693   __ Vmov(OutputDRegister(invoke),
1694           RegisterFrom(locations->GetTemp(0)),
1695           RegisterFrom(locations->GetTemp(1)));
1696 }
1697 
VisitMathCos(HInvoke * invoke)1698 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
1699   CreateFPToFPCallLocations(allocator_, invoke);
1700 }
1701 
VisitMathCos(HInvoke * invoke)1702 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
1703   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
1704 }
1705 
VisitMathSin(HInvoke * invoke)1706 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
1707   CreateFPToFPCallLocations(allocator_, invoke);
1708 }
1709 
VisitMathSin(HInvoke * invoke)1710 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
1711   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
1712 }
1713 
VisitMathAcos(HInvoke * invoke)1714 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
1715   CreateFPToFPCallLocations(allocator_, invoke);
1716 }
1717 
VisitMathAcos(HInvoke * invoke)1718 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
1719   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
1720 }
1721 
VisitMathAsin(HInvoke * invoke)1722 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
1723   CreateFPToFPCallLocations(allocator_, invoke);
1724 }
1725 
VisitMathAsin(HInvoke * invoke)1726 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
1727   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
1728 }
1729 
VisitMathAtan(HInvoke * invoke)1730 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
1731   CreateFPToFPCallLocations(allocator_, invoke);
1732 }
1733 
VisitMathAtan(HInvoke * invoke)1734 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
1735   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
1736 }
1737 
VisitMathCbrt(HInvoke * invoke)1738 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1739   CreateFPToFPCallLocations(allocator_, invoke);
1740 }
1741 
VisitMathCbrt(HInvoke * invoke)1742 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1743   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
1744 }
1745 
VisitMathCosh(HInvoke * invoke)1746 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
1747   CreateFPToFPCallLocations(allocator_, invoke);
1748 }
1749 
VisitMathCosh(HInvoke * invoke)1750 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
1751   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
1752 }
1753 
VisitMathExp(HInvoke * invoke)1754 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
1755   CreateFPToFPCallLocations(allocator_, invoke);
1756 }
1757 
VisitMathExp(HInvoke * invoke)1758 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
1759   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
1760 }
1761 
VisitMathExpm1(HInvoke * invoke)1762 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1763   CreateFPToFPCallLocations(allocator_, invoke);
1764 }
1765 
VisitMathExpm1(HInvoke * invoke)1766 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1767   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
1768 }
1769 
VisitMathLog(HInvoke * invoke)1770 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
1771   CreateFPToFPCallLocations(allocator_, invoke);
1772 }
1773 
VisitMathLog(HInvoke * invoke)1774 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
1775   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
1776 }
1777 
VisitMathLog10(HInvoke * invoke)1778 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
1779   CreateFPToFPCallLocations(allocator_, invoke);
1780 }
1781 
VisitMathLog10(HInvoke * invoke)1782 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
1783   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
1784 }
1785 
VisitMathSinh(HInvoke * invoke)1786 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
1787   CreateFPToFPCallLocations(allocator_, invoke);
1788 }
1789 
VisitMathSinh(HInvoke * invoke)1790 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
1791   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
1792 }
1793 
VisitMathTan(HInvoke * invoke)1794 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
1795   CreateFPToFPCallLocations(allocator_, invoke);
1796 }
1797 
VisitMathTan(HInvoke * invoke)1798 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
1799   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
1800 }
1801 
VisitMathTanh(HInvoke * invoke)1802 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
1803   CreateFPToFPCallLocations(allocator_, invoke);
1804 }
1805 
VisitMathTanh(HInvoke * invoke)1806 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
1807   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
1808 }
1809 
VisitMathAtan2(HInvoke * invoke)1810 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1811   CreateFPFPToFPCallLocations(allocator_, invoke);
1812 }
1813 
VisitMathAtan2(HInvoke * invoke)1814 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1815   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
1816 }
1817 
VisitMathPow(HInvoke * invoke)1818 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
1819   CreateFPFPToFPCallLocations(allocator_, invoke);
1820 }
1821 
VisitMathPow(HInvoke * invoke)1822 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
1823   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
1824 }
1825 
VisitMathHypot(HInvoke * invoke)1826 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
1827   CreateFPFPToFPCallLocations(allocator_, invoke);
1828 }
1829 
VisitMathHypot(HInvoke * invoke)1830 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
1831   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
1832 }
1833 
VisitMathNextAfter(HInvoke * invoke)1834 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1835   CreateFPFPToFPCallLocations(allocator_, invoke);
1836 }
1837 
VisitMathNextAfter(HInvoke * invoke)1838 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1839   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
1840 }
1841 
VisitIntegerReverse(HInvoke * invoke)1842 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1843   CreateIntToIntLocations(allocator_, invoke);
1844 }
1845 
VisitIntegerReverse(HInvoke * invoke)1846 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1847   ArmVIXLAssembler* assembler = GetAssembler();
1848   __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
1849 }
1850 
VisitLongReverse(HInvoke * invoke)1851 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
1852   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1853 }
1854 
VisitLongReverse(HInvoke * invoke)1855 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
1856   ArmVIXLAssembler* assembler = GetAssembler();
1857   LocationSummary* locations = invoke->GetLocations();
1858 
1859   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
1860   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
1861   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
1862   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
1863 
1864   __ Rbit(out_reg_lo, in_reg_hi);
1865   __ Rbit(out_reg_hi, in_reg_lo);
1866 }
1867 
GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler * assembler,Location pair)1868 static void GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler* assembler, Location pair) {
1869   DCHECK(pair.IsRegisterPair());
1870   __ Rev(LowRegisterFrom(pair), LowRegisterFrom(pair));
1871   __ Rev(HighRegisterFrom(pair), HighRegisterFrom(pair));
1872 }
1873 
GenerateReverseBytes(ArmVIXLAssembler * assembler,DataType::Type type,Location in,Location out)1874 static void GenerateReverseBytes(ArmVIXLAssembler* assembler,
1875                                  DataType::Type type,
1876                                  Location in,
1877                                  Location out) {
1878   switch (type) {
1879     case DataType::Type::kUint16:
1880       __ Rev16(RegisterFrom(out), RegisterFrom(in));
1881       break;
1882     case DataType::Type::kInt16:
1883       __ Revsh(RegisterFrom(out), RegisterFrom(in));
1884       break;
1885     case DataType::Type::kInt32:
1886       __ Rev(RegisterFrom(out), RegisterFrom(in));
1887       break;
1888     case DataType::Type::kInt64:
1889       DCHECK(!LowRegisterFrom(out).Is(LowRegisterFrom(in)));
1890       __ Rev(LowRegisterFrom(out), HighRegisterFrom(in));
1891       __ Rev(HighRegisterFrom(out), LowRegisterFrom(in));
1892       break;
1893     case DataType::Type::kFloat32:
1894       __ Rev(RegisterFrom(in), RegisterFrom(in));  // Note: Clobbers `in`.
1895       __ Vmov(SRegisterFrom(out), RegisterFrom(in));
1896       break;
1897     case DataType::Type::kFloat64:
1898       GenerateReverseBytesInPlaceForEachWord(assembler, in);  // Note: Clobbers `in`.
1899       __ Vmov(DRegisterFrom(out), HighRegisterFrom(in), LowRegisterFrom(in));  // Swap high/low.
1900       break;
1901     default:
1902       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
1903       UNREACHABLE();
1904   }
1905 }
1906 
VisitIntegerReverseBytes(HInvoke * invoke)1907 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
1908   CreateIntToIntLocations(allocator_, invoke);
1909 }
1910 
VisitIntegerReverseBytes(HInvoke * invoke)1911 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
1912   ArmVIXLAssembler* assembler = GetAssembler();
1913   LocationSummary* locations = invoke->GetLocations();
1914   GenerateReverseBytes(assembler, DataType::Type::kInt32, locations->InAt(0), locations->Out());
1915 }
1916 
VisitLongReverseBytes(HInvoke * invoke)1917 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
1918   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1919 }
1920 
VisitLongReverseBytes(HInvoke * invoke)1921 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
1922   ArmVIXLAssembler* assembler = GetAssembler();
1923   LocationSummary* locations = invoke->GetLocations();
1924   GenerateReverseBytes(assembler, DataType::Type::kInt64, locations->InAt(0), locations->Out());
1925 }
1926 
VisitShortReverseBytes(HInvoke * invoke)1927 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
1928   CreateIntToIntLocations(allocator_, invoke);
1929 }
1930 
VisitShortReverseBytes(HInvoke * invoke)1931 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
1932   ArmVIXLAssembler* assembler = GetAssembler();
1933   LocationSummary* locations = invoke->GetLocations();
1934   GenerateReverseBytes(assembler, DataType::Type::kInt16, locations->InAt(0), locations->Out());
1935 }
1936 
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)1937 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
1938   DCHECK(DataType::IsIntOrLongType(type)) << type;
1939   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
1940   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
1941 
1942   bool is_long = type == DataType::Type::kInt64;
1943   LocationSummary* locations = instr->GetLocations();
1944   Location in = locations->InAt(0);
1945   vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
1946   vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
1947   vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
1948   vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
1949   vixl32::Register  out_r = OutputRegister(instr);
1950 
1951   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
1952   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
1953   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
1954   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
1955   __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
1956   __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
1957   __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
1958   __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
1959   if (is_long) {
1960     __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
1961   }
1962   __ Vmov(out_r, tmp_s);
1963 }
1964 
VisitIntegerBitCount(HInvoke * invoke)1965 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
1966   CreateIntToIntLocations(allocator_, invoke);
1967   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1968 }
1969 
VisitIntegerBitCount(HInvoke * invoke)1970 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
1971   GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
1972 }
1973 
VisitLongBitCount(HInvoke * invoke)1974 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
1975   VisitIntegerBitCount(invoke);
1976 }
1977 
VisitLongBitCount(HInvoke * invoke)1978 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
1979   GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
1980 }
1981 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)1982 static void GenHighestOneBit(HInvoke* invoke,
1983                              DataType::Type type,
1984                              CodeGeneratorARMVIXL* codegen) {
1985   DCHECK(DataType::IsIntOrLongType(type));
1986 
1987   ArmVIXLAssembler* assembler = codegen->GetAssembler();
1988   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1989   const vixl32::Register temp = temps.Acquire();
1990 
1991   if (type == DataType::Type::kInt64) {
1992     LocationSummary* locations = invoke->GetLocations();
1993     Location in = locations->InAt(0);
1994     Location out = locations->Out();
1995 
1996     vixl32::Register in_reg_lo = LowRegisterFrom(in);
1997     vixl32::Register in_reg_hi = HighRegisterFrom(in);
1998     vixl32::Register out_reg_lo = LowRegisterFrom(out);
1999     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2000 
2001     __ Mov(temp, 0x80000000);  // Modified immediate.
2002     __ Clz(out_reg_lo, in_reg_lo);
2003     __ Clz(out_reg_hi, in_reg_hi);
2004     __ Lsr(out_reg_lo, temp, out_reg_lo);
2005     __ Lsrs(out_reg_hi, temp, out_reg_hi);
2006 
2007     // Discard result for lowest 32 bits if highest 32 bits are not zero.
2008     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2009     // we check that the output is in a low register, so that a 16-bit MOV
2010     // encoding can be used. If output is in a high register, then we generate
2011     // 4 more bytes of code to avoid a branch.
2012     Operand mov_src(0);
2013     if (!out_reg_lo.IsLow()) {
2014       __ Mov(LeaveFlags, temp, 0);
2015       mov_src = Operand(temp);
2016     }
2017     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2018                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2019                                   CodeBufferCheckScope::kExactSize);
2020     __ it(ne);
2021     __ mov(ne, out_reg_lo, mov_src);
2022   } else {
2023     vixl32::Register out = OutputRegister(invoke);
2024     vixl32::Register in = InputRegisterAt(invoke, 0);
2025 
2026     __ Mov(temp, 0x80000000);  // Modified immediate.
2027     __ Clz(out, in);
2028     __ Lsr(out, temp, out);
2029   }
2030 }
2031 
VisitIntegerHighestOneBit(HInvoke * invoke)2032 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2033   CreateIntToIntLocations(allocator_, invoke);
2034 }
2035 
VisitIntegerHighestOneBit(HInvoke * invoke)2036 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2037   GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2038 }
2039 
VisitLongHighestOneBit(HInvoke * invoke)2040 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2041   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2042 }
2043 
VisitLongHighestOneBit(HInvoke * invoke)2044 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2045   GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2046 }
2047 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2048 static void GenLowestOneBit(HInvoke* invoke,
2049                             DataType::Type type,
2050                             CodeGeneratorARMVIXL* codegen) {
2051   DCHECK(DataType::IsIntOrLongType(type));
2052 
2053   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2054   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2055   const vixl32::Register temp = temps.Acquire();
2056 
2057   if (type == DataType::Type::kInt64) {
2058     LocationSummary* locations = invoke->GetLocations();
2059     Location in = locations->InAt(0);
2060     Location out = locations->Out();
2061 
2062     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2063     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2064     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2065     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2066 
2067     __ Rsb(out_reg_hi, in_reg_hi, 0);
2068     __ Rsb(out_reg_lo, in_reg_lo, 0);
2069     __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2070     // The result of this operation is 0 iff in_reg_lo is 0
2071     __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2072 
2073     // Discard result for highest 32 bits if lowest 32 bits are not zero.
2074     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2075     // we check that the output is in a low register, so that a 16-bit MOV
2076     // encoding can be used. If output is in a high register, then we generate
2077     // 4 more bytes of code to avoid a branch.
2078     Operand mov_src(0);
2079     if (!out_reg_lo.IsLow()) {
2080       __ Mov(LeaveFlags, temp, 0);
2081       mov_src = Operand(temp);
2082     }
2083     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2084                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2085                                   CodeBufferCheckScope::kExactSize);
2086     __ it(ne);
2087     __ mov(ne, out_reg_hi, mov_src);
2088   } else {
2089     vixl32::Register out = OutputRegister(invoke);
2090     vixl32::Register in = InputRegisterAt(invoke, 0);
2091 
2092     __ Rsb(temp, in, 0);
2093     __ And(out, temp, in);
2094   }
2095 }
2096 
VisitIntegerLowestOneBit(HInvoke * invoke)2097 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2098   CreateIntToIntLocations(allocator_, invoke);
2099 }
2100 
VisitIntegerLowestOneBit(HInvoke * invoke)2101 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2102   GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2103 }
2104 
VisitLongLowestOneBit(HInvoke * invoke)2105 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2106   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2107 }
2108 
VisitLongLowestOneBit(HInvoke * invoke)2109 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2110   GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2111 }
2112 
VisitStringGetCharsNoCheck(HInvoke * invoke)2113 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2114   LocationSummary* locations =
2115       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2116   locations->SetInAt(0, Location::RequiresRegister());
2117   locations->SetInAt(1, Location::RequiresRegister());
2118   locations->SetInAt(2, Location::RequiresRegister());
2119   locations->SetInAt(3, Location::RequiresRegister());
2120   locations->SetInAt(4, Location::RequiresRegister());
2121 
2122   // Temporary registers to store lengths of strings and for calculations.
2123   locations->AddTemp(Location::RequiresRegister());
2124   locations->AddTemp(Location::RequiresRegister());
2125   locations->AddTemp(Location::RequiresRegister());
2126 }
2127 
VisitStringGetCharsNoCheck(HInvoke * invoke)2128 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2129   ArmVIXLAssembler* assembler = GetAssembler();
2130   LocationSummary* locations = invoke->GetLocations();
2131 
2132   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2133   const size_t char_size = DataType::Size(DataType::Type::kUint16);
2134   DCHECK_EQ(char_size, 2u);
2135 
2136   // Location of data in char array buffer.
2137   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2138 
2139   // Location of char array data in string.
2140   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2141 
2142   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2143   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2144   vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2145   vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2146   vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2147   vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2148   vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2149 
2150   vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2151   vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2152   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2153 
2154   vixl32::Label done, compressed_string_loop;
2155   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2156   // dst to be copied.
2157   __ Add(dst_ptr, dstObj, data_offset);
2158   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2159 
2160   __ Subs(num_chr, srcEnd, srcBegin);
2161   // Early out for valid zero-length retrievals.
2162   __ B(eq, final_label, /* is_far_target= */ false);
2163 
2164   // src range to copy.
2165   __ Add(src_ptr, srcObj, value_offset);
2166 
2167   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2168   vixl32::Register temp;
2169   vixl32::Label compressed_string_preloop;
2170   if (mirror::kUseStringCompression) {
2171     // Location of count in string.
2172     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2173     temp = temps.Acquire();
2174     // String's length.
2175     __ Ldr(temp, MemOperand(srcObj, count_offset));
2176     __ Tst(temp, 1);
2177     temps.Release(temp);
2178     __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2179   }
2180   __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2181 
2182   // Do the copy.
2183   vixl32::Label loop, remainder;
2184 
2185   temp = temps.Acquire();
2186   // Save repairing the value of num_chr on the < 4 character path.
2187   __ Subs(temp, num_chr, 4);
2188   __ B(lt, &remainder, /* is_far_target= */ false);
2189 
2190   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2191   __ Mov(num_chr, temp);
2192 
2193   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2194   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2195   // to rectify these everywhere this intrinsic applies.)
2196   __ Bind(&loop);
2197   __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2198   __ Subs(num_chr, num_chr, 4);
2199   __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2200   __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2201   __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2202   temps.Release(temp);
2203   __ B(ge, &loop, /* is_far_target= */ false);
2204 
2205   __ Adds(num_chr, num_chr, 4);
2206   __ B(eq, final_label, /* is_far_target= */ false);
2207 
2208   // Main loop for < 4 character case and remainder handling. Loads and stores one
2209   // 16-bit Java character at a time.
2210   __ Bind(&remainder);
2211   temp = temps.Acquire();
2212   __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2213   __ Subs(num_chr, num_chr, 1);
2214   __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2215   temps.Release(temp);
2216   __ B(gt, &remainder, /* is_far_target= */ false);
2217 
2218   if (mirror::kUseStringCompression) {
2219     __ B(final_label);
2220 
2221     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2222     DCHECK_EQ(c_char_size, 1u);
2223     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2224     __ Bind(&compressed_string_preloop);
2225     __ Add(src_ptr, src_ptr, srcBegin);
2226     __ Bind(&compressed_string_loop);
2227     temp = temps.Acquire();
2228     __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2229     __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2230     temps.Release(temp);
2231     __ Subs(num_chr, num_chr, 1);
2232     __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2233   }
2234 
2235   if (done.IsReferenced()) {
2236     __ Bind(&done);
2237   }
2238 }
2239 
VisitFloatIsInfinite(HInvoke * invoke)2240 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2241   CreateFPToIntLocations(allocator_, invoke);
2242 }
2243 
VisitFloatIsInfinite(HInvoke * invoke)2244 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2245   ArmVIXLAssembler* const assembler = GetAssembler();
2246   const vixl32::Register out = OutputRegister(invoke);
2247   // Shifting left by 1 bit makes the value encodable as an immediate operand;
2248   // we don't care about the sign bit anyway.
2249   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2250 
2251   __ Vmov(out, InputSRegisterAt(invoke, 0));
2252   // We don't care about the sign bit, so shift left.
2253   __ Lsl(out, out, 1);
2254   __ Eor(out, out, infinity);
2255   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2256 }
2257 
VisitDoubleIsInfinite(HInvoke * invoke)2258 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2259   CreateFPToIntLocations(allocator_, invoke);
2260 }
2261 
VisitDoubleIsInfinite(HInvoke * invoke)2262 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2263   ArmVIXLAssembler* const assembler = GetAssembler();
2264   const vixl32::Register out = OutputRegister(invoke);
2265   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2266   const vixl32::Register temp = temps.Acquire();
2267   // The highest 32 bits of double precision positive infinity separated into
2268   // two constants encodable as immediate operands.
2269   constexpr uint32_t infinity_high  = 0x7f000000U;
2270   constexpr uint32_t infinity_high2 = 0x00f00000U;
2271 
2272   static_assert((infinity_high | infinity_high2) ==
2273                     static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2274                 "The constants do not add up to the high 32 bits of double "
2275                 "precision positive infinity.");
2276   __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2277   __ Eor(out, out, infinity_high);
2278   __ Eor(out, out, infinity_high2);
2279   // We don't care about the sign bit, so shift left.
2280   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2281   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2282 }
2283 
VisitMathCeil(HInvoke * invoke)2284 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2285   if (features_.HasARMv8AInstructions()) {
2286     CreateFPToFPLocations(allocator_, invoke);
2287   }
2288 }
2289 
VisitMathCeil(HInvoke * invoke)2290 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2291   ArmVIXLAssembler* assembler = GetAssembler();
2292   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2293   __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2294 }
2295 
VisitMathFloor(HInvoke * invoke)2296 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2297   if (features_.HasARMv8AInstructions()) {
2298     CreateFPToFPLocations(allocator_, invoke);
2299   }
2300 }
2301 
VisitMathFloor(HInvoke * invoke)2302 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2303   ArmVIXLAssembler* assembler = GetAssembler();
2304   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2305   __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2306 }
2307 
2308 #define VISIT_INTRINSIC(name, low, high, type, start_index)                                       \
2309   void IntrinsicLocationsBuilderARMVIXL::Visit##name##ValueOf(HInvoke* invoke) {                  \
2310     InvokeRuntimeCallingConventionARMVIXL calling_convention;                                     \
2311     IntrinsicVisitor::ComputeValueOfLocations(invoke,                                             \
2312                                               codegen_,                                           \
2313                                               low,                                                \
2314                                               (high) - (low) + 1,                                 \
2315                                               LocationFrom(r0),                                   \
2316                                               LocationFrom(calling_convention.GetRegisterAt(0))); \
2317   }                                                                                               \
2318   void IntrinsicCodeGeneratorARMVIXL::Visit##name##ValueOf(HInvoke* invoke) {                     \
2319     IntrinsicVisitor::ValueOfInfo info =                                                          \
2320         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                              \
2321                                              codegen_->GetCompilerOptions(),                      \
2322                                              WellKnownClasses::java_lang_##name##_value,          \
2323                                              low,                                                 \
2324                                              (high) - (low) + 1,                                  \
2325                                              start_index);                                        \
2326     HandleValueOf(invoke, info, type);                                                            \
2327   }
BOXED_TYPES(VISIT_INTRINSIC)2328   BOXED_TYPES(VISIT_INTRINSIC)
2329 #undef VISIT_INTRINSIC
2330 
2331 
2332 void IntrinsicCodeGeneratorARMVIXL::HandleValueOf(HInvoke* invoke,
2333                                                   const IntrinsicVisitor::ValueOfInfo& info,
2334                                                   DataType::Type type) {
2335   LocationSummary* locations = invoke->GetLocations();
2336   ArmVIXLAssembler* const assembler = GetAssembler();
2337 
2338   vixl32::Register out = RegisterFrom(locations->Out());
2339   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2340   vixl32::Register temp = temps.Acquire();
2341   auto allocate_instance = [&]() {
2342     DCHECK(out.Is(InvokeRuntimeCallingConventionARMVIXL().GetRegisterAt(0)));
2343     codegen_->LoadIntrinsicDeclaringClass(out, invoke);
2344     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2345     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2346   };
2347   if (invoke->InputAt(0)->IsIntConstant()) {
2348     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2349     if (static_cast<uint32_t>(value - info.low) < info.length) {
2350       // Just embed the object in the code.
2351       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
2352       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2353     } else {
2354       DCHECK(locations->CanCall());
2355       // Allocate and initialize a new object.
2356       // TODO: If we JIT, we could allocate the object now, and store it in the
2357       // JIT object table.
2358       allocate_instance();
2359       __ Mov(temp, value);
2360       assembler->StoreToOffset(GetStoreOperandType(type), temp, out, info.value_offset);
2361       // Class pointer and `value` final field stores require a barrier before publication.
2362       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2363     }
2364   } else {
2365     DCHECK(locations->CanCall());
2366     vixl32::Register in = RegisterFrom(locations->InAt(0));
2367     // Check bounds of our cache.
2368     __ Add(out, in, -info.low);
2369     __ Cmp(out, info.length);
2370     vixl32::Label allocate, done;
2371     __ B(hs, &allocate, /* is_far_target= */ false);
2372     // If the value is within the bounds, load the object directly from the array.
2373     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2374     codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
2375     assembler->MaybeUnpoisonHeapReference(out);
2376     __ B(&done);
2377     __ Bind(&allocate);
2378     // Otherwise allocate and initialize a new object.
2379     allocate_instance();
2380     assembler->StoreToOffset(GetStoreOperandType(type), in, out, info.value_offset);
2381     // Class pointer and `value` final field stores require a barrier before publication.
2382     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2383     __ Bind(&done);
2384   }
2385 }
2386 
VisitReferenceGetReferent(HInvoke * invoke)2387 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2388   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
2389 }
2390 
VisitReferenceGetReferent(HInvoke * invoke)2391 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2392   ArmVIXLAssembler* assembler = GetAssembler();
2393   LocationSummary* locations = invoke->GetLocations();
2394 
2395   Location obj = locations->InAt(0);
2396   Location out = locations->Out();
2397 
2398   SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2399   codegen_->AddSlowPath(slow_path);
2400 
2401   if (codegen_->EmitReadBarrier()) {
2402     // Check self->GetWeakRefAccessEnabled().
2403     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2404     vixl32::Register temp = temps.Acquire();
2405     __ Ldr(temp,
2406            MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArmPointerSize>().Uint32Value()));
2407     __ Cmp(temp, enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled));
2408     __ B(ne, slow_path->GetEntryLabel());
2409   }
2410 
2411   {
2412     // Load the java.lang.ref.Reference class.
2413     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2414     vixl32::Register temp = temps.Acquire();
2415     codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
2416 
2417     // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
2418     MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
2419     DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
2420     DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
2421               IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
2422     __ Ldrh(temp, MemOperand(temp, disable_intrinsic_offset.Uint32Value()));
2423     __ Cmp(temp, 0);
2424     __ B(ne, slow_path->GetEntryLabel());
2425   }
2426 
2427   // Load the value from the field.
2428   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2429   if (codegen_->EmitBakerReadBarrier()) {
2430     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2431                                                     out,
2432                                                     RegisterFrom(obj),
2433                                                     referent_offset,
2434                                                     /*maybe_temp=*/ Location::NoLocation(),
2435                                                     /*needs_null_check=*/ true);
2436     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2437   } else {
2438     {
2439       vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2440       __ Ldr(RegisterFrom(out), MemOperand(RegisterFrom(obj), referent_offset));
2441       codegen_->MaybeRecordImplicitNullCheck(invoke);
2442     }
2443     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2444     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
2445   }
2446   __ Bind(slow_path->GetExitLabel());
2447 }
2448 
VisitReferenceRefersTo(HInvoke * invoke)2449 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2450   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
2451 }
2452 
VisitReferenceRefersTo(HInvoke * invoke)2453 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2454   LocationSummary* locations = invoke->GetLocations();
2455   ArmVIXLAssembler* assembler = GetAssembler();
2456   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2457 
2458   vixl32::Register obj = RegisterFrom(locations->InAt(0));
2459   vixl32::Register other = RegisterFrom(locations->InAt(1));
2460   vixl32::Register out = RegisterFrom(locations->Out());
2461   vixl32::Register tmp = temps.Acquire();
2462 
2463   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2464   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2465 
2466   {
2467     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2468     // Loading scratch register always uses 32-bit encoding.
2469     vixl::ExactAssemblyScope eas(assembler->GetVIXLAssembler(),
2470                                  vixl32::k32BitT32InstructionSizeInBytes);
2471     __ ldr(tmp, MemOperand(obj, referent_offset));
2472     codegen_->MaybeRecordImplicitNullCheck(invoke);
2473   }
2474   assembler->MaybeUnpoisonHeapReference(tmp);
2475   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2476 
2477   if (codegen_->EmitReadBarrier()) {
2478     DCHECK(kUseBakerReadBarrier);
2479 
2480     vixl32::Label calculate_result;
2481     __ Subs(out, tmp, other);
2482     __ B(eq, &calculate_result);  // `out` is 0 if taken.
2483 
2484     // Check if the loaded reference is null.
2485     __ Cmp(tmp, 0);
2486     __ B(eq, &calculate_result);  // `out` is not 0 if taken.
2487 
2488     // For correct memory visibility, we need a barrier before loading the lock word
2489     // but we already have the barrier emitted for volatile load above which is sufficient.
2490 
2491     // Load the lockword and check if it is a forwarding address.
2492     static_assert(LockWord::kStateShift == 30u);
2493     static_assert(LockWord::kStateForwardingAddress == 3u);
2494     __ Ldr(tmp, MemOperand(tmp, monitor_offset));
2495     __ Cmp(tmp, Operand(0xc0000000));
2496     __ B(lo, &calculate_result);   // `out` is not 0 if taken.
2497 
2498     // Extract the forwarding address and subtract from `other`.
2499     __ Sub(out, other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
2500 
2501     __ Bind(&calculate_result);
2502   } else {
2503     DCHECK(!codegen_->EmitReadBarrier());
2504     __ Sub(out, tmp, other);
2505   }
2506 
2507   // Convert 0 to 1 and non-zero to 0 for the Boolean result (`out = (out == 0)`).
2508   __ Clz(out, out);
2509   __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
2510 }
2511 
VisitThreadInterrupted(HInvoke * invoke)2512 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2513   LocationSummary* locations =
2514       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2515   locations->SetOut(Location::RequiresRegister());
2516 }
2517 
VisitThreadInterrupted(HInvoke * invoke)2518 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2519   ArmVIXLAssembler* assembler = GetAssembler();
2520   vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
2521   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
2522   __ Ldr(out, MemOperand(tr, offset));
2523   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2524   vixl32::Register temp = temps.Acquire();
2525   vixl32::Label done;
2526   vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
2527   __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
2528   __ Dmb(vixl32::ISH);
2529   __ Mov(temp, 0);
2530   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
2531   __ Dmb(vixl32::ISH);
2532   if (done.IsReferenced()) {
2533     __ Bind(&done);
2534   }
2535 }
2536 
VisitReachabilityFence(HInvoke * invoke)2537 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
2538   LocationSummary* locations =
2539       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2540   locations->SetInAt(0, Location::Any());
2541 }
2542 
VisitReachabilityFence(HInvoke * invoke)2543 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
2544 
VisitIntegerDivideUnsigned(HInvoke * invoke)2545 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2546   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
2547 }
2548 
VisitIntegerDivideUnsigned(HInvoke * invoke)2549 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2550   ArmVIXLAssembler* assembler = GetAssembler();
2551   LocationSummary* locations = invoke->GetLocations();
2552   vixl32::Register dividend = RegisterFrom(locations->InAt(0));
2553   vixl32::Register divisor = RegisterFrom(locations->InAt(1));
2554   vixl32::Register out = RegisterFrom(locations->Out());
2555 
2556   // Check if divisor is zero, bail to managed implementation to handle.
2557   SlowPathCodeARMVIXL* slow_path =
2558       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2559   codegen_->AddSlowPath(slow_path);
2560   __ CompareAndBranchIfZero(divisor, slow_path->GetEntryLabel());
2561 
2562   __ Udiv(out, dividend, divisor);
2563 
2564   __ Bind(slow_path->GetExitLabel());
2565 }
2566 
Use64BitExclusiveLoadStore(bool atomic,CodeGeneratorARMVIXL * codegen)2567 static inline bool Use64BitExclusiveLoadStore(bool atomic, CodeGeneratorARMVIXL* codegen) {
2568   return atomic && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
2569 }
2570 
GenerateIntrinsicGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location out,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2571 static void GenerateIntrinsicGet(HInvoke* invoke,
2572                                  CodeGeneratorARMVIXL* codegen,
2573                                  DataType::Type type,
2574                                  std::memory_order order,
2575                                  bool atomic,
2576                                  vixl32::Register base,
2577                                  vixl32::Register offset,
2578                                  Location out,
2579                                  Location maybe_temp,
2580                                  Location maybe_temp2,
2581                                  Location maybe_temp3) {
2582   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2583   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2584   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2585   DCHECK(atomic || order == std::memory_order_relaxed);
2586 
2587   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2588   MemOperand address(base, offset);
2589   switch (type) {
2590     case DataType::Type::kBool:
2591       __ Ldrb(RegisterFrom(out), address);
2592       break;
2593     case DataType::Type::kInt8:
2594       __ Ldrsb(RegisterFrom(out), address);
2595       break;
2596     case DataType::Type::kUint16:
2597       __ Ldrh(RegisterFrom(out), address);
2598       break;
2599     case DataType::Type::kInt16:
2600       __ Ldrsh(RegisterFrom(out), address);
2601       break;
2602     case DataType::Type::kInt32:
2603       __ Ldr(RegisterFrom(out), address);
2604       break;
2605     case DataType::Type::kInt64:
2606       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2607         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp);
2608         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2609         const vixl32::Register temp_reg = temps.Acquire();
2610         __ Add(temp_reg, base, offset);
2611         vixl32::Label loop;
2612         __ Bind(&loop);
2613         __ Ldrexd(LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2614         __ Strexd(strexd_tmp, LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2615         __ Cmp(strexd_tmp, 0);
2616         __ B(ne, &loop);
2617       } else {
2618         __ Ldrd(LowRegisterFrom(out), HighRegisterFrom(out), address);
2619       }
2620       break;
2621     case DataType::Type::kReference:
2622       if (codegen->EmitBakerReadBarrier()) {
2623         // Piggy-back on the field load path using introspection for the Baker read barrier.
2624         vixl32::Register temp = RegisterFrom(maybe_temp);
2625         __ Add(temp, base, offset);
2626         codegen->GenerateFieldLoadWithBakerReadBarrier(
2627             invoke, out, base, MemOperand(temp), /* needs_null_check= */ false);
2628       } else {
2629         __ Ldr(RegisterFrom(out), address);
2630       }
2631       break;
2632     case DataType::Type::kFloat32: {
2633       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2634       const vixl32::Register temp_reg = temps.Acquire();
2635       __ Add(temp_reg, base, offset);
2636       __ Vldr(SRegisterFrom(out), MemOperand(temp_reg));
2637       break;
2638     }
2639     case DataType::Type::kFloat64: {
2640       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2641       const vixl32::Register temp_reg = temps.Acquire();
2642       __ Add(temp_reg, base, offset);
2643       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2644         vixl32::Register lo = RegisterFrom(maybe_temp);
2645         vixl32::Register hi = RegisterFrom(maybe_temp2);
2646         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2647         vixl32::Label loop;
2648         __ Bind(&loop);
2649         __ Ldrexd(lo, hi, MemOperand(temp_reg));
2650         __ Strexd(strexd_tmp, lo, hi, MemOperand(temp_reg));
2651         __ Cmp(strexd_tmp, 0);
2652         __ B(ne, &loop);
2653         __ Vmov(DRegisterFrom(out), lo, hi);
2654       } else {
2655         __ Vldr(DRegisterFrom(out), MemOperand(temp_reg));
2656       }
2657       break;
2658     }
2659     default:
2660       LOG(FATAL) << "Unexpected type " << type;
2661       UNREACHABLE();
2662   }
2663   if (acquire_barrier) {
2664     codegen->GenerateMemoryBarrier(
2665         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
2666   }
2667   if (type == DataType::Type::kReference && !codegen->EmitBakerReadBarrier()) {
2668     Location base_loc = LocationFrom(base);
2669     Location index_loc = LocationFrom(offset);
2670     codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
2671   }
2672 }
2673 
CreateUnsafeGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2674 static void CreateUnsafeGetLocations(HInvoke* invoke,
2675                                      CodeGeneratorARMVIXL* codegen,
2676                                      DataType::Type type,
2677                                      bool atomic) {
2678   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
2679   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2680   LocationSummary* locations =
2681       new (allocator) LocationSummary(invoke,
2682                                       can_call
2683                                           ? LocationSummary::kCallOnSlowPath
2684                                           : LocationSummary::kNoCall,
2685                                       kIntrinsified);
2686   if (can_call && kUseBakerReadBarrier) {
2687     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2688   }
2689   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2690   locations->SetInAt(1, Location::RequiresRegister());
2691   locations->SetInAt(2, Location::RequiresRegister());
2692   locations->SetOut(Location::RequiresRegister(),
2693                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2694   if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
2695       (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2696     // We need a temporary register for the read barrier marking slow
2697     // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
2698     // or the STREXD result for LDREXD/STREXD sequence when LDRD is non-atomic.
2699     locations->AddTemp(Location::RequiresRegister());
2700   }
2701 }
2702 
GenUnsafeGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic)2703 static void GenUnsafeGet(HInvoke* invoke,
2704                          CodeGeneratorARMVIXL* codegen,
2705                          DataType::Type type,
2706                          std::memory_order order,
2707                          bool atomic) {
2708   LocationSummary* locations = invoke->GetLocations();
2709   vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
2710   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
2711   Location out = locations->Out();
2712   Location maybe_temp = Location::NoLocation();
2713   if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
2714       (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2715     maybe_temp = locations->GetTemp(0);
2716   }
2717   GenerateIntrinsicGet(invoke,
2718                        codegen,
2719                        type,
2720                        order,
2721                        atomic,
2722                        base,
2723                        offset,
2724                        out,
2725                        maybe_temp,
2726                        /*maybe_temp2=*/ Location::NoLocation(),
2727                        /*maybe_temp3=*/ Location::NoLocation());
2728 }
2729 
VisitUnsafeGet(HInvoke * invoke)2730 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2731   VisitJdkUnsafeGet(invoke);
2732 }
2733 
VisitUnsafeGet(HInvoke * invoke)2734 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2735   VisitJdkUnsafeGet(invoke);
2736 }
2737 
VisitUnsafeGetVolatile(HInvoke * invoke)2738 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2739   VisitJdkUnsafeGetVolatile(invoke);
2740 }
2741 
VisitUnsafeGetVolatile(HInvoke * invoke)2742 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2743   VisitJdkUnsafeGetVolatile(invoke);
2744 }
2745 
VisitUnsafeGetLong(HInvoke * invoke)2746 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2747   VisitJdkUnsafeGetLong(invoke);
2748 }
2749 
VisitUnsafeGetLong(HInvoke * invoke)2750 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2751   VisitJdkUnsafeGetLong(invoke);
2752 }
2753 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2754 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2755   VisitJdkUnsafeGetLongVolatile(invoke);
2756 }
2757 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2758 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2759   VisitJdkUnsafeGetLongVolatile(invoke);
2760 }
2761 
VisitUnsafeGetObject(HInvoke * invoke)2762 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2763   VisitJdkUnsafeGetReference(invoke);
2764 }
2765 
VisitUnsafeGetObject(HInvoke * invoke)2766 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2767   VisitJdkUnsafeGetReference(invoke);
2768 }
2769 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2770 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2771   VisitJdkUnsafeGetReferenceVolatile(invoke);
2772 }
2773 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2774 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2775   VisitJdkUnsafeGetReferenceVolatile(invoke);
2776 }
2777 
VisitUnsafeGetByte(HInvoke * invoke)2778 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
2779   VisitJdkUnsafeGetByte(invoke);
2780 }
2781 
VisitUnsafeGetByte(HInvoke * invoke)2782 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
2783   VisitJdkUnsafeGetByte(invoke);
2784 }
2785 
VisitJdkUnsafeGet(HInvoke * invoke)2786 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2787   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
2788 }
2789 
VisitJdkUnsafeGet(HInvoke * invoke)2790 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2791   GenUnsafeGet(
2792       invoke, codegen_, DataType::Type::kInt32, std::memory_order_relaxed, /*atomic=*/ false);
2793 }
2794 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2795 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2796   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2797 }
2798 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2799 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2800   GenUnsafeGet(
2801       invoke, codegen_, DataType::Type::kInt32, std::memory_order_seq_cst, /*atomic=*/ true);
2802 }
2803 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2804 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2805   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2806 }
2807 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2808 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2809   GenUnsafeGet(
2810       invoke, codegen_, DataType::Type::kInt32, std::memory_order_acquire, /*atomic=*/ true);
2811 }
2812 
VisitJdkUnsafeGetLong(HInvoke * invoke)2813 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2814   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
2815 }
2816 
VisitJdkUnsafeGetLong(HInvoke * invoke)2817 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2818   GenUnsafeGet(
2819       invoke, codegen_, DataType::Type::kInt64, std::memory_order_relaxed, /*atomic=*/ false);
2820 }
2821 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2822 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2823   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2824 }
2825 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2826 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2827   GenUnsafeGet(
2828       invoke, codegen_, DataType::Type::kInt64, std::memory_order_seq_cst, /*atomic=*/ true);
2829 }
2830 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2831 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2832   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2833 }
2834 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2835 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2836   GenUnsafeGet(
2837       invoke, codegen_, DataType::Type::kInt64, std::memory_order_acquire, /*atomic=*/ true);
2838 }
2839 
VisitJdkUnsafeGetReference(HInvoke * invoke)2840 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2841   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
2842 }
2843 
VisitJdkUnsafeGetReference(HInvoke * invoke)2844 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2845   GenUnsafeGet(
2846       invoke, codegen_, DataType::Type::kReference, std::memory_order_relaxed, /*atomic=*/ false);
2847 }
2848 
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2849 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2850   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2851 }
2852 
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2853 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2854   GenUnsafeGet(
2855       invoke, codegen_, DataType::Type::kReference, std::memory_order_seq_cst, /*atomic=*/ true);
2856 }
2857 
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2858 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2859   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2860 }
2861 
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2862 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2863   GenUnsafeGet(
2864       invoke, codegen_, DataType::Type::kReference, std::memory_order_acquire, /*atomic=*/ true);
2865 }
2866 
VisitJdkUnsafeGetByte(HInvoke * invoke)2867 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2868   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
2869 }
2870 
VisitJdkUnsafeGetByte(HInvoke * invoke)2871 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2872   GenUnsafeGet(
2873       invoke, codegen_, DataType::Type::kInt8, std::memory_order_relaxed, /*atomic=*/ false);
2874 }
2875 
GenerateIntrinsicSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location value,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2876 static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
2877                                  DataType::Type type,
2878                                  std::memory_order order,
2879                                  bool atomic,
2880                                  vixl32::Register base,
2881                                  vixl32::Register offset,
2882                                  Location value,
2883                                  Location maybe_temp,
2884                                  Location maybe_temp2,
2885                                  Location maybe_temp3) {
2886   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2887   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
2888   DCHECK(release_barrier || order == std::memory_order_relaxed);
2889   DCHECK(atomic || order == std::memory_order_relaxed);
2890 
2891   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2892   if (release_barrier) {
2893     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
2894   }
2895   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2896   if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2897     vixl32::Register temp = temps.Acquire();
2898     __ Mov(temp, RegisterFrom(value));
2899     assembler->PoisonHeapReference(temp);
2900     value = LocationFrom(temp);
2901   }
2902   MemOperand address = offset.IsValid() ? MemOperand(base, offset) : MemOperand(base);
2903   if (offset.IsValid() && (DataType::Is64BitType(type) || type == DataType::Type::kFloat32)) {
2904     const vixl32::Register temp_reg = temps.Acquire();
2905     __ Add(temp_reg, base, offset);
2906     address = MemOperand(temp_reg);
2907   }
2908   switch (type) {
2909     case DataType::Type::kBool:
2910     case DataType::Type::kInt8:
2911       __ Strb(RegisterFrom(value), address);
2912       break;
2913     case DataType::Type::kUint16:
2914     case DataType::Type::kInt16:
2915       __ Strh(RegisterFrom(value), address);
2916       break;
2917     case DataType::Type::kReference:
2918     case DataType::Type::kInt32:
2919       __ Str(RegisterFrom(value), address);
2920       break;
2921     case DataType::Type::kInt64:
2922       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2923         vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2924         vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2925         vixl32::Label loop;
2926         __ Bind(&loop);
2927         __ Ldrexd(lo_tmp, hi_tmp, address);  // Ignore the retrieved value.
2928         __ Strexd(lo_tmp, LowRegisterFrom(value), HighRegisterFrom(value), address);
2929         __ Cmp(lo_tmp, 0);
2930         __ B(ne, &loop);
2931       } else {
2932         __ Strd(LowRegisterFrom(value), HighRegisterFrom(value), address);
2933       }
2934       break;
2935     case DataType::Type::kFloat32:
2936       __ Vstr(SRegisterFrom(value), address);
2937       break;
2938     case DataType::Type::kFloat64:
2939       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2940         vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2941         vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2942         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2943         vixl32::Label loop;
2944         __ Bind(&loop);
2945         __ Ldrexd(lo_tmp, hi_tmp, address);  // Ignore the retrieved value.
2946         __ Vmov(lo_tmp, hi_tmp, DRegisterFrom(value));
2947         __ Strexd(strexd_tmp, lo_tmp, hi_tmp, address);
2948         __ Cmp(strexd_tmp, 0);
2949         __ B(ne, &loop);
2950       } else {
2951         __ Vstr(DRegisterFrom(value), address);
2952       }
2953       break;
2954     default:
2955       LOG(FATAL) << "Unexpected type " << type;
2956       UNREACHABLE();
2957   }
2958   if (seq_cst_barrier) {
2959     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2960   }
2961 }
2962 
CreateUnsafePutLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2963 static void CreateUnsafePutLocations(HInvoke* invoke,
2964                                      CodeGeneratorARMVIXL* codegen,
2965                                      DataType::Type type,
2966                                      bool atomic) {
2967   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2968   LocationSummary* locations =
2969       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2970   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2971   locations->SetInAt(1, Location::RequiresRegister());
2972   locations->SetInAt(2, Location::RequiresRegister());
2973   locations->SetInAt(3, Location::RequiresRegister());
2974 
2975   if (type == DataType::Type::kInt64) {
2976     // Potentially need temps for ldrexd-strexd loop.
2977     if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2978       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
2979       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
2980     }
2981   } else if (type == DataType::Type::kReference) {
2982     // Temp for card-marking.
2983     locations->AddTemp(Location::RequiresRegister());  // Temp.
2984   }
2985 }
2986 
GenUnsafePut(HInvoke * invoke,DataType::Type type,std::memory_order order,bool atomic,CodeGeneratorARMVIXL * codegen)2987 static void GenUnsafePut(HInvoke* invoke,
2988                          DataType::Type type,
2989                          std::memory_order order,
2990                          bool atomic,
2991                          CodeGeneratorARMVIXL* codegen) {
2992   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2993 
2994   LocationSummary* locations = invoke->GetLocations();
2995   vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
2996   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
2997   Location value = locations->InAt(3);
2998   Location maybe_temp = Location::NoLocation();
2999   Location maybe_temp2 = Location::NoLocation();
3000   if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
3001     maybe_temp = locations->GetTemp(0);
3002     maybe_temp2 = locations->GetTemp(1);
3003   }
3004 
3005   GenerateIntrinsicSet(codegen,
3006                        type,
3007                        order,
3008                        atomic,
3009                        base,
3010                        offset,
3011                        value,
3012                        maybe_temp,
3013                        maybe_temp2,
3014                        /*maybe_temp3=*/ Location::NoLocation());
3015 
3016   if (type == DataType::Type::kReference) {
3017     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3018     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3019     vixl32::Register card = temps.Acquire();
3020     bool value_can_be_null = true;  // TODO: Worth finding out this information?
3021     codegen->MaybeMarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
3022   }
3023 }
3024 
VisitUnsafePut(HInvoke * invoke)3025 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3026   VisitJdkUnsafePut(invoke);
3027 }
3028 
VisitUnsafePut(HInvoke * invoke)3029 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3030   VisitJdkUnsafePut(invoke);
3031 }
3032 
VisitUnsafePutOrdered(HInvoke * invoke)3033 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3034   VisitJdkUnsafePutOrdered(invoke);
3035 }
3036 
VisitUnsafePutOrdered(HInvoke * invoke)3037 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3038   VisitJdkUnsafePutOrdered(invoke);
3039 }
3040 
VisitUnsafePutVolatile(HInvoke * invoke)3041 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3042   VisitJdkUnsafePutVolatile(invoke);
3043 }
3044 
VisitUnsafePutVolatile(HInvoke * invoke)3045 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3046   VisitJdkUnsafePutVolatile(invoke);
3047 }
VisitUnsafePutObject(HInvoke * invoke)3048 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3049   VisitJdkUnsafePutReference(invoke);
3050 }
3051 
VisitUnsafePutObject(HInvoke * invoke)3052 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3053   VisitJdkUnsafePutReference(invoke);
3054 }
3055 
VisitUnsafePutObjectOrdered(HInvoke * invoke)3056 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3057   VisitJdkUnsafePutObjectOrdered(invoke);
3058 }
3059 
VisitUnsafePutObjectOrdered(HInvoke * invoke)3060 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3061   VisitJdkUnsafePutObjectOrdered(invoke);
3062 }
3063 
VisitUnsafePutObjectVolatile(HInvoke * invoke)3064 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3065   VisitJdkUnsafePutReferenceVolatile(invoke);
3066 }
3067 
VisitUnsafePutObjectVolatile(HInvoke * invoke)3068 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3069   VisitJdkUnsafePutReferenceVolatile(invoke);
3070 }
3071 
VisitUnsafePutLong(HInvoke * invoke)3072 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3073   VisitJdkUnsafePutLong(invoke);
3074 }
3075 
VisitUnsafePutLong(HInvoke * invoke)3076 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3077   VisitJdkUnsafePutLong(invoke);
3078 }
3079 
VisitUnsafePutLongOrdered(HInvoke * invoke)3080 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3081   VisitJdkUnsafePutLongOrdered(invoke);
3082 }
3083 
VisitUnsafePutLongOrdered(HInvoke * invoke)3084 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3085   VisitJdkUnsafePutLongOrdered(invoke);
3086 }
3087 
VisitUnsafePutLongVolatile(HInvoke * invoke)3088 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3089   VisitJdkUnsafePutLongVolatile(invoke);
3090 }
3091 
VisitUnsafePutLongVolatile(HInvoke * invoke)3092 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3093   VisitJdkUnsafePutLongVolatile(invoke);
3094 }
3095 
VisitUnsafePutByte(HInvoke * invoke)3096 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
3097   VisitJdkUnsafePutByte(invoke);
3098 }
3099 
VisitUnsafePutByte(HInvoke * invoke)3100 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
3101   VisitJdkUnsafePutByte(invoke);
3102 }
3103 
VisitJdkUnsafePut(HInvoke * invoke)3104 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3105   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
3106 }
3107 
VisitJdkUnsafePut(HInvoke * invoke)3108 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3109   GenUnsafePut(invoke,
3110                DataType::Type::kInt32,
3111                std::memory_order_relaxed,
3112                /*atomic=*/ false,
3113                codegen_);
3114 }
3115 
VisitJdkUnsafePutByte(HInvoke * invoke)3116 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
3117   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
3118 }
3119 
VisitJdkUnsafePutByte(HInvoke * invoke)3120 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
3121   GenUnsafePut(invoke,
3122                DataType::Type::kInt8,
3123                std::memory_order_relaxed,
3124                /*atomic=*/ false,
3125                codegen_);
3126 }
3127 
VisitJdkUnsafePutOrdered(HInvoke * invoke)3128 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3129   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3130 }
3131 
VisitJdkUnsafePutOrdered(HInvoke * invoke)3132 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3133   GenUnsafePut(invoke,
3134                DataType::Type::kInt32,
3135                std::memory_order_release,
3136                /*atomic=*/ true,
3137                codegen_);
3138 }
3139 
VisitJdkUnsafePutVolatile(HInvoke * invoke)3140 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3141   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3142 }
3143 
VisitJdkUnsafePutVolatile(HInvoke * invoke)3144 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3145   GenUnsafePut(invoke,
3146                DataType::Type::kInt32,
3147                std::memory_order_seq_cst,
3148                /*atomic=*/ true,
3149                codegen_);
3150 }
3151 
VisitJdkUnsafePutRelease(HInvoke * invoke)3152 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3153   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3154 }
3155 
VisitJdkUnsafePutRelease(HInvoke * invoke)3156 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3157   GenUnsafePut(invoke,
3158                DataType::Type::kInt32,
3159                std::memory_order_release,
3160                /*atomic=*/ true,
3161                codegen_);
3162 }
3163 
VisitJdkUnsafePutReference(HInvoke * invoke)3164 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
3165   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
3166 }
3167 
VisitJdkUnsafePutReference(HInvoke * invoke)3168 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
3169   GenUnsafePut(invoke,
3170                DataType::Type::kReference,
3171                std::memory_order_relaxed,
3172                /*atomic=*/ false,
3173                codegen_);
3174 }
3175 
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3176 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3177   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3178 }
3179 
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3180 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3181   GenUnsafePut(invoke,
3182                DataType::Type::kReference,
3183                std::memory_order_release,
3184                /*atomic=*/ true,
3185                codegen_);
3186 }
3187 
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)3188 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
3189   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3190 }
3191 
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)3192 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
3193   GenUnsafePut(invoke,
3194                DataType::Type::kReference,
3195                std::memory_order_seq_cst,
3196                /*atomic=*/ true,
3197                codegen_);
3198 }
3199 
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)3200 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
3201   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3202 }
3203 
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)3204 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
3205   GenUnsafePut(invoke,
3206                DataType::Type::kReference,
3207                std::memory_order_release,
3208                /*atomic=*/ true,
3209                codegen_);
3210 }
3211 
VisitJdkUnsafePutLong(HInvoke * invoke)3212 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3213   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
3214 }
3215 
VisitJdkUnsafePutLong(HInvoke * invoke)3216 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3217   GenUnsafePut(invoke,
3218                DataType::Type::kInt64,
3219                std::memory_order_relaxed,
3220                /*atomic=*/ false,
3221                codegen_);
3222 }
3223 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3224 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3225   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3226 }
3227 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3228 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3229   GenUnsafePut(invoke,
3230                DataType::Type::kInt64,
3231                std::memory_order_release,
3232                /*atomic=*/ true,
3233                codegen_);
3234 }
3235 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3236 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3237   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3238 }
3239 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3240 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3241   GenUnsafePut(invoke,
3242                DataType::Type::kInt64,
3243                std::memory_order_seq_cst,
3244                /*atomic=*/ true,
3245                codegen_);
3246 }
3247 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3248 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3249   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3250 }
3251 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3252 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3253   GenUnsafePut(invoke,
3254                DataType::Type::kInt64,
3255                std::memory_order_release,
3256                /*atomic=*/ true,
3257                codegen_);
3258 }
3259 
EmitLoadExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,Location old_value)3260 static void EmitLoadExclusive(CodeGeneratorARMVIXL* codegen,
3261                               DataType::Type type,
3262                               vixl32::Register ptr,
3263                               Location old_value) {
3264   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3265   switch (type) {
3266     case DataType::Type::kBool:
3267     case DataType::Type::kInt8:
3268       __ Ldrexb(RegisterFrom(old_value), MemOperand(ptr));
3269       break;
3270     case DataType::Type::kUint16:
3271     case DataType::Type::kInt16:
3272       __ Ldrexh(RegisterFrom(old_value), MemOperand(ptr));
3273       break;
3274     case DataType::Type::kInt32:
3275     case DataType::Type::kReference:
3276       __ Ldrex(RegisterFrom(old_value), MemOperand(ptr));
3277       break;
3278     case DataType::Type::kInt64:
3279       __ Ldrexd(LowRegisterFrom(old_value), HighRegisterFrom(old_value), MemOperand(ptr));
3280       break;
3281     default:
3282       LOG(FATAL) << "Unexpected type: " << type;
3283       UNREACHABLE();
3284   }
3285   switch (type) {
3286     case DataType::Type::kInt8:
3287       __ Sxtb(RegisterFrom(old_value), RegisterFrom(old_value));
3288       break;
3289     case DataType::Type::kInt16:
3290       __ Sxth(RegisterFrom(old_value), RegisterFrom(old_value));
3291       break;
3292     case DataType::Type::kReference:
3293       assembler->MaybeUnpoisonHeapReference(RegisterFrom(old_value));
3294       break;
3295     default:
3296       break;
3297   }
3298 }
3299 
EmitStoreExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,vixl32::Register store_result,Location new_value)3300 static void EmitStoreExclusive(CodeGeneratorARMVIXL* codegen,
3301                                DataType::Type type,
3302                                vixl32::Register ptr,
3303                                vixl32::Register store_result,
3304                                Location new_value) {
3305   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3306   if (type == DataType::Type::kReference) {
3307     assembler->MaybePoisonHeapReference(RegisterFrom(new_value));
3308   }
3309   switch (type) {
3310     case DataType::Type::kBool:
3311     case DataType::Type::kInt8:
3312       __ Strexb(store_result, RegisterFrom(new_value), MemOperand(ptr));
3313       break;
3314     case DataType::Type::kUint16:
3315     case DataType::Type::kInt16:
3316       __ Strexh(store_result, RegisterFrom(new_value), MemOperand(ptr));
3317       break;
3318     case DataType::Type::kInt32:
3319     case DataType::Type::kReference:
3320       __ Strex(store_result, RegisterFrom(new_value), MemOperand(ptr));
3321       break;
3322     case DataType::Type::kInt64:
3323       __ Strexd(
3324           store_result, LowRegisterFrom(new_value), HighRegisterFrom(new_value), MemOperand(ptr));
3325       break;
3326     default:
3327       LOG(FATAL) << "Unexpected type: " << type;
3328       UNREACHABLE();
3329   }
3330   if (type == DataType::Type::kReference) {
3331     assembler->MaybeUnpoisonHeapReference(RegisterFrom(new_value));
3332   }
3333 }
3334 
GenerateCompareAndSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,bool strong,vixl32::Label * cmp_failure,bool cmp_failure_is_far_target,vixl32::Register ptr,Location expected,Location new_value,Location old_value,vixl32::Register store_result,vixl32::Register success)3335 static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
3336                                   DataType::Type type,
3337                                   bool strong,
3338                                   vixl32::Label* cmp_failure,
3339                                   bool cmp_failure_is_far_target,
3340                                   vixl32::Register ptr,
3341                                   Location expected,
3342                                   Location new_value,
3343                                   Location old_value,
3344                                   vixl32::Register store_result,
3345                                   vixl32::Register success) {
3346   // For kReference, the `expected` shall be a register pair when called from a read barrier
3347   // slow path, specifying both the original `expected` as well as the unmarked old value from
3348   // the main path attempt to emit CAS when it matched `expected` after marking.
3349   // Otherwise the type of `expected` shall match the type of `new_value` and `old_value`.
3350   if (type == DataType::Type::kInt64) {
3351     DCHECK(expected.IsRegisterPair());
3352     DCHECK(new_value.IsRegisterPair());
3353     DCHECK(old_value.IsRegisterPair());
3354   } else {
3355     DCHECK(expected.IsRegister() ||
3356            (type == DataType::Type::kReference && expected.IsRegisterPair()));
3357     DCHECK(new_value.IsRegister());
3358     DCHECK(old_value.IsRegister());
3359     // Make sure the unmarked old value for reference CAS slow path is not clobbered by STREX.
3360     DCHECK(!expected.Contains(LocationFrom(store_result)));
3361   }
3362 
3363   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3364 
3365   // do {
3366   //   old_value = [ptr];  // Load exclusive.
3367   //   if (old_value != expected) goto cmp_failure;
3368   //   store_result = failed([ptr] <- new_value);  // Store exclusive.
3369   // } while (strong && store_result);
3370   //
3371   // If `success` is a valid register, there are additional instructions in the above code
3372   // to report success with value 1 and failure with value 0 in that register.
3373 
3374   vixl32::Label loop_head;
3375   if (strong) {
3376     __ Bind(&loop_head);
3377   }
3378   EmitLoadExclusive(codegen, type, ptr, old_value);
3379   // We do not need to initialize the failure code for comparison failure if the
3380   // branch goes to the read barrier slow path that clobbers `success` anyway.
3381   bool init_failure_for_cmp =
3382       success.IsValid() &&
3383       !(type == DataType::Type::kReference && codegen->EmitReadBarrier() && expected.IsRegister());
3384   // Instruction scheduling: Loading a constant between LDREX* and using the loaded value
3385   // is essentially free, so prepare the failure value here if we can.
3386   bool init_failure_for_cmp_early =
3387       init_failure_for_cmp && !old_value.Contains(LocationFrom(success));
3388   if (init_failure_for_cmp_early) {
3389     __ Mov(success, 0);  // Indicate failure if the comparison fails.
3390   }
3391   if (type == DataType::Type::kInt64) {
3392     __ Cmp(LowRegisterFrom(old_value), LowRegisterFrom(expected));
3393     ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3394     __ it(eq);
3395     __ cmp(eq, HighRegisterFrom(old_value), HighRegisterFrom(expected));
3396   } else if (expected.IsRegisterPair()) {
3397     DCHECK_EQ(type, DataType::Type::kReference);
3398     DCHECK(!expected.Contains(old_value));
3399     // Check if the loaded value matches any of the two registers in `expected`.
3400     __ Cmp(RegisterFrom(old_value), LowRegisterFrom(expected));
3401     ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3402     __ it(ne);
3403     __ cmp(ne, RegisterFrom(old_value), HighRegisterFrom(expected));
3404   } else {
3405     __ Cmp(RegisterFrom(old_value), RegisterFrom(expected));
3406   }
3407   if (init_failure_for_cmp && !init_failure_for_cmp_early) {
3408     __ Mov(LeaveFlags, success, 0);  // Indicate failure if the comparison fails.
3409   }
3410   __ B(ne, cmp_failure, /*is_far_target=*/ cmp_failure_is_far_target);
3411   EmitStoreExclusive(codegen, type, ptr, store_result, new_value);
3412   if (strong) {
3413     // Instruction scheduling: Loading a constant between STREX* and using its result
3414     // is essentially free, so prepare the success value here if needed and possible.
3415     if (success.IsValid() && !success.Is(store_result)) {
3416       __ Mov(success, 1);  // Indicate success if the store succeeds.
3417     }
3418     __ Cmp(store_result, 0);
3419     if (success.IsValid() && success.Is(store_result)) {
3420       __ Mov(LeaveFlags, success, 1);  // Indicate success if the store succeeds.
3421     }
3422     __ B(ne, &loop_head, /*is_far_target=*/ false);
3423   } else {
3424     // Weak CAS (VarHandle.CompareAndExchange variants) always indicates success.
3425     DCHECK(success.IsValid());
3426     // Flip the `store_result` to indicate success by 1 and failure by 0.
3427     __ Eor(success, store_result, 1);
3428   }
3429 }
3430 
3431 class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
3432  public:
ReadBarrierCasSlowPathARMVIXL(HInvoke * invoke,bool strong,vixl32::Register base,vixl32::Register offset,vixl32::Register expected,vixl32::Register new_value,vixl32::Register old_value,vixl32::Register old_value_temp,vixl32::Register store_result,vixl32::Register success,CodeGeneratorARMVIXL * arm_codegen)3433   explicit ReadBarrierCasSlowPathARMVIXL(HInvoke* invoke,
3434                                          bool strong,
3435                                          vixl32::Register base,
3436                                          vixl32::Register offset,
3437                                          vixl32::Register expected,
3438                                          vixl32::Register new_value,
3439                                          vixl32::Register old_value,
3440                                          vixl32::Register old_value_temp,
3441                                          vixl32::Register store_result,
3442                                          vixl32::Register success,
3443                                          CodeGeneratorARMVIXL* arm_codegen)
3444       : SlowPathCodeARMVIXL(invoke),
3445         strong_(strong),
3446         base_(base),
3447         offset_(offset),
3448         expected_(expected),
3449         new_value_(new_value),
3450         old_value_(old_value),
3451         old_value_temp_(old_value_temp),
3452         store_result_(store_result),
3453         success_(success),
3454         mark_old_value_slow_path_(nullptr),
3455         update_old_value_slow_path_(nullptr) {
3456     if (!kUseBakerReadBarrier) {
3457       // We need to add the slow path now, it is too late when emitting slow path code.
3458       mark_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3459           invoke,
3460           Location::RegisterLocation(old_value_temp.GetCode()),
3461           Location::RegisterLocation(old_value.GetCode()),
3462           Location::RegisterLocation(base.GetCode()),
3463           /*offset=*/ 0u,
3464           /*index=*/ Location::RegisterLocation(offset.GetCode()));
3465       if (!success.IsValid()) {
3466         update_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3467             invoke,
3468             Location::RegisterLocation(old_value.GetCode()),
3469             Location::RegisterLocation(old_value_temp.GetCode()),
3470             Location::RegisterLocation(base.GetCode()),
3471             /*offset=*/ 0u,
3472             /*index=*/ Location::RegisterLocation(offset.GetCode()));
3473       }
3474     }
3475   }
3476 
GetDescription() const3477   const char* GetDescription() const override { return "ReadBarrierCasSlowPathARMVIXL"; }
3478 
EmitNativeCode(CodeGenerator * codegen)3479   void EmitNativeCode(CodeGenerator* codegen) override {
3480     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
3481     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
3482     __ Bind(GetEntryLabel());
3483 
3484     // Mark the `old_value_` from the main path and compare with `expected_`.
3485     if (kUseBakerReadBarrier) {
3486       DCHECK(mark_old_value_slow_path_ == nullptr);
3487       arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_);
3488     } else {
3489       DCHECK(mark_old_value_slow_path_ != nullptr);
3490       __ B(mark_old_value_slow_path_->GetEntryLabel());
3491       __ Bind(mark_old_value_slow_path_->GetExitLabel());
3492     }
3493     __ Cmp(old_value_temp_, expected_);
3494     if (success_.IsValid()) {
3495       __ Mov(LeaveFlags, success_, 0);  // Indicate failure if we take the branch out.
3496     } else {
3497       // In case of failure, update the `old_value_` with the marked reference.
3498       ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3499       __ it(ne);
3500       __ mov(ne, old_value_, old_value_temp_);
3501     }
3502     __ B(ne, GetExitLabel());
3503 
3504     // The old value we have read did not match `expected` (which is always a to-space
3505     // reference) but after the read barrier the marked to-space value matched, so the
3506     // old value must be a from-space reference to the same object. Do the same CAS loop
3507     // as the main path but check for both `expected` and the unmarked old value
3508     // representing the to-space and from-space references for the same object.
3509 
3510     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3511     vixl32::Register tmp_ptr = temps.Acquire();
3512 
3513     // Recalculate the `tmp_ptr` clobbered above.
3514     __ Add(tmp_ptr, base_, offset_);
3515 
3516     vixl32::Label mark_old_value;
3517     GenerateCompareAndSet(arm_codegen,
3518                           DataType::Type::kReference,
3519                           strong_,
3520                           /*cmp_failure=*/ success_.IsValid() ? GetExitLabel() : &mark_old_value,
3521                           /*cmp_failure_is_far_target=*/ success_.IsValid(),
3522                           tmp_ptr,
3523                           /*expected=*/ LocationFrom(expected_, old_value_),
3524                           /*new_value=*/ LocationFrom(new_value_),
3525                           /*old_value=*/ LocationFrom(old_value_temp_),
3526                           store_result_,
3527                           success_);
3528     if (!success_.IsValid()) {
3529       // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
3530       // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
3531       __ Mov(old_value_, expected_);
3532     }
3533 
3534     __ B(GetExitLabel());
3535 
3536     if (!success_.IsValid()) {
3537       __ Bind(&mark_old_value);
3538       if (kUseBakerReadBarrier) {
3539         DCHECK(update_old_value_slow_path_ == nullptr);
3540         arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_);
3541       } else {
3542         // Note: We could redirect the `failure` above directly to the entry label and bind
3543         // the exit label in the main path, but the main path would need to access the
3544         // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
3545         DCHECK(update_old_value_slow_path_ != nullptr);
3546         __ B(update_old_value_slow_path_->GetEntryLabel());
3547         __ Bind(update_old_value_slow_path_->GetExitLabel());
3548       }
3549       __ B(GetExitLabel());
3550     }
3551   }
3552 
3553  private:
3554   bool strong_;
3555   vixl32::Register base_;
3556   vixl32::Register offset_;
3557   vixl32::Register expected_;
3558   vixl32::Register new_value_;
3559   vixl32::Register old_value_;
3560   vixl32::Register old_value_temp_;
3561   vixl32::Register store_result_;
3562   vixl32::Register success_;
3563   SlowPathCodeARMVIXL* mark_old_value_slow_path_;
3564   SlowPathCodeARMVIXL* update_old_value_slow_path_;
3565 };
3566 
CreateUnsafeCASLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3567 static void CreateUnsafeCASLocations(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3568   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
3569   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3570   LocationSummary* locations =
3571       new (allocator) LocationSummary(invoke,
3572                                       can_call
3573                                           ? LocationSummary::kCallOnSlowPath
3574                                           : LocationSummary::kNoCall,
3575                                       kIntrinsified);
3576   if (can_call && kUseBakerReadBarrier) {
3577     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3578   }
3579   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3580   locations->SetInAt(1, Location::RequiresRegister());
3581   locations->SetInAt(2, Location::RequiresRegister());
3582   locations->SetInAt(3, Location::RequiresRegister());
3583   locations->SetInAt(4, Location::RequiresRegister());
3584 
3585   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3586 
3587   // Temporary register used in CAS. In the object case (UnsafeCASObject intrinsic),
3588   // this is also used for card-marking, and possibly for read barrier.
3589   locations->AddTemp(Location::RequiresRegister());
3590 }
3591 
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)3592 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
3593   DCHECK_NE(type, DataType::Type::kInt64);
3594 
3595   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3596   LocationSummary* locations = invoke->GetLocations();
3597 
3598   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
3599   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
3600   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
3601   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
3602   vixl32::Register new_value = InputRegisterAt(invoke, 4);            // New value.
3603 
3604   vixl32::Register tmp = RegisterFrom(locations->GetTemp(0));         // Temporary.
3605 
3606   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3607   vixl32::Register tmp_ptr = temps.Acquire();
3608 
3609   if (type == DataType::Type::kReference) {
3610     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
3611     // object and scan the receiver at the next GC for nothing.
3612     bool value_can_be_null = true;  // TODO: Worth finding out this information?
3613     codegen->MaybeMarkGCCard(tmp_ptr, tmp, base, new_value, value_can_be_null);
3614   }
3615 
3616   vixl32::Label exit_loop_label;
3617   vixl32::Label* exit_loop = &exit_loop_label;
3618   vixl32::Label* cmp_failure = &exit_loop_label;
3619 
3620   if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
3621     // If marking, check if the stored reference is a from-space reference to the same
3622     // object as the to-space reference `expected`. If so, perform a custom CAS loop.
3623     ReadBarrierCasSlowPathARMVIXL* slow_path =
3624         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
3625             invoke,
3626             /*strong=*/ true,
3627             base,
3628             offset,
3629             expected,
3630             new_value,
3631             /*old_value=*/ tmp,
3632             /*old_value_temp=*/ out,
3633             /*store_result=*/ out,
3634             /*success=*/ out,
3635             codegen);
3636     codegen->AddSlowPath(slow_path);
3637     exit_loop = slow_path->GetExitLabel();
3638     cmp_failure = slow_path->GetEntryLabel();
3639   }
3640 
3641   // Unsafe CAS operations have std::memory_order_seq_cst semantics.
3642   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3643   __ Add(tmp_ptr, base, offset);
3644   GenerateCompareAndSet(codegen,
3645                         type,
3646                         /*strong=*/ true,
3647                         cmp_failure,
3648                         /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
3649                         tmp_ptr,
3650                         /*expected=*/ LocationFrom(expected),  // TODO: Int64
3651                         /*new_value=*/ LocationFrom(new_value),  // TODO: Int64
3652                         /*old_value=*/ LocationFrom(tmp),  // TODO: Int64
3653                         /*store_result=*/ tmp,
3654                         /*success=*/ out);
3655   __ Bind(exit_loop);
3656   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3657 
3658   if (type == DataType::Type::kReference) {
3659     codegen->MaybeGenerateMarkingRegisterCheck(/*code=*/ 128, /*temp_loc=*/ LocationFrom(tmp_ptr));
3660   }
3661 }
3662 
VisitUnsafeCASInt(HInvoke * invoke)3663 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3664   VisitJdkUnsafeCASInt(invoke);
3665 }
VisitUnsafeCASObject(HInvoke * invoke)3666 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3667   VisitJdkUnsafeCASObject(invoke);
3668 }
3669 
VisitJdkUnsafeCASInt(HInvoke * invoke)3670 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3671   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3672   VisitJdkUnsafeCompareAndSetInt(invoke);
3673 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3674 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3675   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3676   VisitJdkUnsafeCompareAndSetReference(invoke);
3677 }
3678 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3679 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3680   CreateUnsafeCASLocations(invoke, codegen_);
3681 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3682 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3683   // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3684   if (codegen_->EmitNonBakerReadBarrier()) {
3685     return;
3686   }
3687 
3688   CreateUnsafeCASLocations(invoke, codegen_);
3689 }
3690 
VisitUnsafeCASInt(HInvoke * invoke)3691 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3692   VisitJdkUnsafeCASInt(invoke);
3693 }
VisitUnsafeCASObject(HInvoke * invoke)3694 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3695   VisitJdkUnsafeCASObject(invoke);
3696 }
3697 
VisitJdkUnsafeCASInt(HInvoke * invoke)3698 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3699   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3700   VisitJdkUnsafeCompareAndSetInt(invoke);
3701 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3702 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3703   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3704   VisitJdkUnsafeCompareAndSetReference(invoke);
3705 }
3706 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3707 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3708   GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
3709 }
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)3710 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
3711   // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3712   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3713 
3714   GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
3715 }
3716 
3717 enum class GetAndUpdateOp {
3718   kSet,
3719   kAdd,
3720   kAddWithByteSwap,
3721   kAnd,
3722   kOr,
3723   kXor
3724 };
3725 
GenerateGetAndUpdate(CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,vixl32::Register ptr,Location arg,Location old_value,vixl32::Register store_result,Location maybe_temp,Location maybe_vreg_temp)3726 static void GenerateGetAndUpdate(CodeGeneratorARMVIXL* codegen,
3727                                  GetAndUpdateOp get_and_update_op,
3728                                  DataType::Type load_store_type,
3729                                  vixl32::Register ptr,
3730                                  Location arg,
3731                                  Location old_value,
3732                                  vixl32::Register store_result,
3733                                  Location maybe_temp,
3734                                  Location maybe_vreg_temp) {
3735   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3736 
3737   Location loaded_value;
3738   Location new_value;
3739   switch (get_and_update_op) {
3740     case GetAndUpdateOp::kSet:
3741       loaded_value = old_value;
3742       new_value = arg;
3743       break;
3744     case GetAndUpdateOp::kAddWithByteSwap:
3745       if (old_value.IsRegisterPair()) {
3746         // To avoid register overlap when reversing bytes, load into temps.
3747         DCHECK(maybe_temp.IsRegisterPair());
3748         loaded_value = maybe_temp;
3749         new_value = loaded_value;  // Use the same temporaries for the new value.
3750         break;
3751       }
3752       FALLTHROUGH_INTENDED;
3753     case GetAndUpdateOp::kAdd:
3754       if (old_value.IsFpuRegisterPair()) {
3755         DCHECK(maybe_temp.IsRegisterPair());
3756         loaded_value = maybe_temp;
3757         new_value = loaded_value;  // Use the same temporaries for the new value.
3758         break;
3759       }
3760       if (old_value.IsFpuRegister()) {
3761         DCHECK(maybe_temp.IsRegister());
3762         loaded_value = maybe_temp;
3763         new_value = loaded_value;  // Use the same temporary for the new value.
3764         break;
3765       }
3766       FALLTHROUGH_INTENDED;
3767     case GetAndUpdateOp::kAnd:
3768     case GetAndUpdateOp::kOr:
3769     case GetAndUpdateOp::kXor:
3770       loaded_value = old_value;
3771       new_value = maybe_temp;
3772       break;
3773   }
3774 
3775   vixl32::Label loop_label;
3776   __ Bind(&loop_label);
3777   EmitLoadExclusive(codegen, load_store_type, ptr, loaded_value);
3778   switch (get_and_update_op) {
3779     case GetAndUpdateOp::kSet:
3780       break;
3781     case GetAndUpdateOp::kAddWithByteSwap:
3782       if (arg.IsFpuRegisterPair()) {
3783         GenerateReverseBytes(assembler, DataType::Type::kFloat64, loaded_value, old_value);
3784         vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3785         __ Vadd(sum, DRegisterFrom(old_value), DRegisterFrom(arg));
3786         __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), sum);  // Swap low/high.
3787       } else if (arg.IsFpuRegister()) {
3788         GenerateReverseBytes(assembler, DataType::Type::kFloat32, loaded_value, old_value);
3789         vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp);  // The temporary is a pair.
3790         __ Vadd(sum, SRegisterFrom(old_value), SRegisterFrom(arg));
3791         __ Vmov(RegisterFrom(new_value), sum);
3792       } else if (load_store_type == DataType::Type::kInt64) {
3793         GenerateReverseBytes(assembler, DataType::Type::kInt64, loaded_value, old_value);
3794         // Swap low/high registers for the addition results.
3795         __ Adds(HighRegisterFrom(new_value), LowRegisterFrom(old_value), LowRegisterFrom(arg));
3796         __ Adc(LowRegisterFrom(new_value), HighRegisterFrom(old_value), HighRegisterFrom(arg));
3797       } else {
3798         GenerateReverseBytes(assembler, DataType::Type::kInt32, loaded_value, old_value);
3799         __ Add(RegisterFrom(new_value), RegisterFrom(old_value), RegisterFrom(arg));
3800       }
3801       if (load_store_type == DataType::Type::kInt64) {
3802         // The `new_value` already has the high and low word swapped. Reverse bytes in each.
3803         GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
3804       } else {
3805         GenerateReverseBytes(assembler, load_store_type, new_value, new_value);
3806       }
3807       break;
3808     case GetAndUpdateOp::kAdd:
3809       if (arg.IsFpuRegisterPair()) {
3810         vixl32::DRegister old_value_vreg = DRegisterFrom(old_value);
3811         vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3812         __ Vmov(old_value_vreg, LowRegisterFrom(loaded_value), HighRegisterFrom(loaded_value));
3813         __ Vadd(sum, old_value_vreg, DRegisterFrom(arg));
3814         __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), sum);
3815       } else if (arg.IsFpuRegister()) {
3816         vixl32::SRegister old_value_vreg = SRegisterFrom(old_value);
3817         vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp);  // The temporary is a pair.
3818         __ Vmov(old_value_vreg, RegisterFrom(loaded_value));
3819         __ Vadd(sum, old_value_vreg, SRegisterFrom(arg));
3820         __ Vmov(RegisterFrom(new_value), sum);
3821       } else if (load_store_type == DataType::Type::kInt64) {
3822         __ Adds(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3823         __ Adc(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3824       } else {
3825         __ Add(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3826       }
3827       break;
3828     case GetAndUpdateOp::kAnd:
3829       if (load_store_type == DataType::Type::kInt64) {
3830         __ And(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3831         __ And(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3832       } else {
3833         __ And(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3834       }
3835       break;
3836     case GetAndUpdateOp::kOr:
3837       if (load_store_type == DataType::Type::kInt64) {
3838         __ Orr(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3839         __ Orr(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3840       } else {
3841         __ Orr(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3842       }
3843       break;
3844     case GetAndUpdateOp::kXor:
3845       if (load_store_type == DataType::Type::kInt64) {
3846         __ Eor(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3847         __ Eor(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3848       } else {
3849         __ Eor(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3850       }
3851       break;
3852   }
3853   EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value);
3854   __ Cmp(store_result, 0);
3855   __ B(ne, &loop_label);
3856 }
3857 
CreateUnsafeGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,GetAndUpdateOp get_and_update_op)3858 static void CreateUnsafeGetAndUpdateLocations(HInvoke* invoke,
3859                                               CodeGeneratorARMVIXL* codegen,
3860                                               DataType::Type type,
3861                                               GetAndUpdateOp get_and_update_op) {
3862   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
3863   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3864   LocationSummary* locations =
3865       new (allocator) LocationSummary(invoke,
3866                                       can_call
3867                                           ? LocationSummary::kCallOnSlowPath
3868                                           : LocationSummary::kNoCall,
3869                                       kIntrinsified);
3870   if (can_call && kUseBakerReadBarrier) {
3871     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3872   }
3873   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3874   locations->SetInAt(1, Location::RequiresRegister());
3875   locations->SetInAt(2, Location::RequiresRegister());
3876   locations->SetInAt(3, Location::RequiresRegister());
3877 
3878   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3879 
3880   size_t num_temps = 1u;  // We always need `tmp_ptr`.
3881   if (get_and_update_op == GetAndUpdateOp::kAdd) {
3882     // Add `maybe_temp` used for the new value in `GenerateGetAndUpdate()`.
3883     num_temps += (type == DataType::Type::kInt64) ? 2u : 1u;
3884     if (type == DataType::Type::kInt64) {
3885       // There are enough available registers but the register allocator can fail to allocate
3886       // them correctly because it can block register pairs by single-register inputs and temps.
3887       // To work around this limitation, use a fixed register pair for both the output as well
3888       // as the offset which is not needed anymore after the address calculation.
3889       // (Alternatively, we could set up distinct fixed locations for `offset`, `arg` and `out`.)
3890       locations->SetInAt(2, LocationFrom(r0, r1));
3891       locations->UpdateOut(LocationFrom(r0, r1));
3892     }
3893   }
3894   locations->AddRegisterTemps(num_temps);
3895 }
3896 
GenUnsafeGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,GetAndUpdateOp get_and_update_op)3897 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
3898                                   CodeGeneratorARMVIXL* codegen,
3899                                   DataType::Type type,
3900                                   GetAndUpdateOp get_and_update_op) {
3901   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3902   LocationSummary* locations = invoke->GetLocations();
3903 
3904   Location out = locations->Out();                                  // Result.
3905   vixl32::Register base = InputRegisterAt(invoke, 1);               // Object pointer.
3906   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));    // Offset (discard high 4B).
3907   Location arg = locations->InAt(3);                                // New value or addend.
3908   vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0));   // Pointer to actual memory.
3909   Location maybe_temp = Location::NoLocation();
3910   if (get_and_update_op == GetAndUpdateOp::kAdd) {
3911     maybe_temp = (type == DataType::Type::kInt64)
3912         ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
3913         : locations->GetTemp(1);
3914   }
3915 
3916   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3917   vixl32::Register temp = temps.Acquire();
3918 
3919   if (type == DataType::Type::kReference) {
3920     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3921     // Mark card for object as a new value shall be stored.
3922     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
3923     vixl32::Register card = tmp_ptr;  // Use the `tmp_ptr` also as the `card` temporary.
3924     codegen->MaybeMarkGCCard(temp, card, base, /*value=*/ RegisterFrom(arg), new_value_can_be_null);
3925   }
3926 
3927   // Note: UnsafeGetAndUpdate operations are sequentially consistent, requiring
3928   // a barrier before and after the raw load/store-exclusive operation.
3929 
3930   __ Add(tmp_ptr, base, Operand(offset));
3931   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3932   GenerateGetAndUpdate(codegen,
3933                        get_and_update_op,
3934                        type,
3935                        tmp_ptr,
3936                        arg,
3937                        /*old_value=*/ out,
3938                        /*store_result=*/ temp,
3939                        maybe_temp,
3940                        /*maybe_vreg_temp=*/ Location::NoLocation());
3941   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3942 
3943   if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
3944     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
3945     if (kUseBakerReadBarrier) {
3946       codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out), RegisterFrom(out));
3947     } else {
3948       codegen->GenerateReadBarrierSlow(
3949           invoke,
3950           out,
3951           out,
3952           Location::RegisterLocation(base.GetCode()),
3953           /*offset=*/ 0u,
3954           /*index=*/ Location::RegisterLocation(offset.GetCode()));
3955     }
3956   }
3957 }
3958 
VisitUnsafeGetAndAddInt(HInvoke * invoke)3959 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3960   VisitJdkUnsafeGetAndAddInt(invoke);
3961 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)3962 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3963   VisitJdkUnsafeGetAndAddLong(invoke);
3964 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)3965 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3966   VisitJdkUnsafeGetAndSetInt(invoke);
3967 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)3968 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
3969   VisitJdkUnsafeGetAndSetLong(invoke);
3970 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)3971 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
3972   VisitJdkUnsafeGetAndSetReference(invoke);
3973 }
3974 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)3975 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
3976   CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
3977 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)3978 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
3979   CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
3980 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)3981 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
3982   CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
3983 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)3984 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
3985   CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
3986 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)3987 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
3988   CreateUnsafeGetAndUpdateLocations(
3989       invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
3990 }
3991 
VisitUnsafeGetAndAddInt(HInvoke * invoke)3992 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
3993   VisitJdkUnsafeGetAndAddInt(invoke);
3994 }
VisitUnsafeGetAndAddLong(HInvoke * invoke)3995 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
3996   VisitJdkUnsafeGetAndAddLong(invoke);
3997 }
VisitUnsafeGetAndSetInt(HInvoke * invoke)3998 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
3999   VisitJdkUnsafeGetAndSetInt(invoke);
4000 }
VisitUnsafeGetAndSetLong(HInvoke * invoke)4001 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
4002   VisitJdkUnsafeGetAndSetLong(invoke);
4003 }
VisitUnsafeGetAndSetObject(HInvoke * invoke)4004 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
4005   VisitJdkUnsafeGetAndSetReference(invoke);
4006 }
4007 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)4008 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
4009   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
4010 }
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)4011 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
4012   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
4013 }
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)4014 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
4015   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
4016 }
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)4017 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
4018   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
4019 }
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)4020 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
4021   GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
4022 }
4023 
4024 class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL {
4025  public:
VarHandleSlowPathARMVIXL(HInvoke * invoke,std::memory_order order)4026   VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order)
4027       : IntrinsicSlowPathARMVIXL(invoke),
4028         order_(order),
4029         atomic_(false),
4030         return_success_(false),
4031         strong_(false),
4032         get_and_update_op_(GetAndUpdateOp::kAdd) {
4033   }
4034 
GetByteArrayViewCheckLabel()4035   vixl32::Label* GetByteArrayViewCheckLabel() {
4036     return &byte_array_view_check_label_;
4037   }
4038 
GetNativeByteOrderLabel()4039   vixl32::Label* GetNativeByteOrderLabel() {
4040     return &native_byte_order_label_;
4041   }
4042 
SetAtomic(bool atomic)4043   void SetAtomic(bool atomic) {
4044     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGet ||
4045            GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kSet);
4046     atomic_ = atomic;
4047   }
4048 
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)4049   void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
4050     if (return_success) {
4051       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
4052     } else {
4053       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
4054     }
4055     return_success_ = return_success;
4056     strong_ = strong;
4057   }
4058 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)4059   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
4060     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
4061     get_and_update_op_ = get_and_update_op;
4062   }
4063 
EmitNativeCode(CodeGenerator * codegen_in)4064   void EmitNativeCode(CodeGenerator* codegen_in) override {
4065     if (GetByteArrayViewCheckLabel()->IsReferenced()) {
4066       EmitByteArrayViewCode(codegen_in);
4067     }
4068     IntrinsicSlowPathARMVIXL::EmitNativeCode(codegen_in);
4069   }
4070 
4071  private:
GetInvoke() const4072   HInvoke* GetInvoke() const {
4073     return GetInstruction()->AsInvoke();
4074   }
4075 
GetAccessModeTemplate() const4076   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
4077     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
4078   }
4079 
4080   void EmitByteArrayViewCode(CodeGenerator* codegen_in);
4081 
4082   vixl32::Label byte_array_view_check_label_;
4083   vixl32::Label native_byte_order_label_;
4084   // Shared parameter for all VarHandle intrinsics.
4085   std::memory_order order_;
4086   // Extra argument for GenerateVarHandleGet() and GenerateVarHandleSet().
4087   bool atomic_;
4088   // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
4089   bool return_success_;
4090   bool strong_;
4091   // Extra argument for GenerateVarHandleGetAndUpdate().
4092   GetAndUpdateOp get_and_update_op_;
4093 };
4094 
4095 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,vixl32::Register object,vixl32::Register type,bool object_can_be_null=true)4096 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL* codegen,
4097                                                     SlowPathCodeARMVIXL* slow_path,
4098                                                     vixl32::Register object,
4099                                                     vixl32::Register type,
4100                                                     bool object_can_be_null = true) {
4101   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4102 
4103   const MemberOffset class_offset = mirror::Object::ClassOffset();
4104   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
4105 
4106   vixl32::Label success;
4107   if (object_can_be_null) {
4108     __ CompareAndBranchIfZero(object, &success, /*is_far_target=*/ false);
4109   }
4110 
4111   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4112   vixl32::Register temp = temps.Acquire();
4113 
4114   __ Ldr(temp, MemOperand(object, class_offset.Int32Value()));
4115   assembler->MaybeUnpoisonHeapReference(temp);
4116   vixl32::Label loop;
4117   __ Bind(&loop);
4118   __ Cmp(type, temp);
4119   __ B(eq, &success, /*is_far_target=*/ false);
4120   __ Ldr(temp, MemOperand(temp, super_class_offset.Int32Value()));
4121   assembler->MaybeUnpoisonHeapReference(temp);
4122   __ Cmp(temp, 0);
4123   __ B(eq, slow_path->GetEntryLabel());
4124   __ B(&loop);
4125   __ Bind(&success);
4126 }
4127 
4128 // Check access mode and the primitive type from VarHandle.varType.
4129 // Check reference arguments against the VarHandle.varType; for references this is a subclass
4130 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,DataType::Type type)4131 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
4132                                                         CodeGeneratorARMVIXL* codegen,
4133                                                         SlowPathCodeARMVIXL* slow_path,
4134                                                         DataType::Type type) {
4135   mirror::VarHandle::AccessMode access_mode =
4136       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
4137   Primitive::Type primitive_type = DataTypeToPrimitive(type);
4138 
4139   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4140   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4141 
4142   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
4143   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
4144   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4145 
4146   // Use the temporary register reserved for offset. It is not used yet at this point.
4147   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4148   vixl32::Register var_type_no_rb =
4149       RegisterFrom(invoke->GetLocations()->GetTemp(expected_coordinates_count == 0u ? 1u : 0u));
4150 
4151   // Check that the operation is permitted and the primitive type of varhandle.varType.
4152   // We do not need a read barrier when loading a reference only for loading constant
4153   // primitive field through the reference. Use LDRD to load the fields together.
4154   {
4155     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4156     vixl32::Register temp2 = temps.Acquire();
4157     DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
4158     __ Ldrd(var_type_no_rb, temp2, MemOperand(varhandle, var_type_offset.Int32Value()));
4159     assembler->MaybeUnpoisonHeapReference(var_type_no_rb);
4160     __ Tst(temp2, 1u << static_cast<uint32_t>(access_mode));
4161     __ B(eq, slow_path->GetEntryLabel());
4162     __ Ldrh(temp2, MemOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
4163     __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4164     __ B(ne, slow_path->GetEntryLabel());
4165   }
4166 
4167   if (type == DataType::Type::kReference) {
4168     // Check reference arguments against the varType.
4169     // False negatives due to varType being an interface or array type
4170     // or due to the missing read barrier are handled by the slow path.
4171     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4172     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4173     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4174       HInstruction* arg = invoke->InputAt(arg_index);
4175       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4176       if (!arg->IsNullConstant()) {
4177         vixl32::Register arg_reg = RegisterFrom(invoke->GetLocations()->InAt(arg_index));
4178         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4179       }
4180     }
4181   }
4182 }
4183 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4184 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4185                                               CodeGeneratorARMVIXL* codegen,
4186                                               SlowPathCodeARMVIXL* slow_path) {
4187   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4188   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4189 
4190   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4191 
4192   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4193   vixl32::Register temp = temps.Acquire();
4194 
4195   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4196   // Do not emit read barrier (or unpoison the reference) for comparing to null.
4197   __ Ldr(temp, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4198   __ Cmp(temp, 0);
4199   __ B(ne, slow_path->GetEntryLabel());
4200 }
4201 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4202 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4203                                                  CodeGeneratorARMVIXL* codegen,
4204                                                  SlowPathCodeARMVIXL* slow_path) {
4205   VarHandleOptimizations optimizations(invoke);
4206   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4207   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4208   vixl32::Register object = InputRegisterAt(invoke, 1);
4209 
4210   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4211   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4212 
4213   // Null-check the object.
4214   if (!optimizations.GetSkipObjectNullCheck()) {
4215     __ Cmp(object, 0);
4216     __ B(eq, slow_path->GetEntryLabel());
4217   }
4218 
4219   if (!optimizations.GetUseKnownImageVarHandle()) {
4220     // Use the first temporary register, whether it's for the declaring class or the offset.
4221     // It is not used yet at this point.
4222     vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4223 
4224     // Check that the VarHandle references an instance field by checking that
4225     // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4226     // type compatibility check with the source object's type, which will fail for null.
4227     {
4228       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4229       vixl32::Register temp2 = temps.Acquire();
4230       DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4231       __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4232       assembler->MaybeUnpoisonHeapReference(temp);
4233       // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4234       __ Cmp(temp2, 0);
4235       __ B(ne, slow_path->GetEntryLabel());
4236     }
4237 
4238     // Check that the object has the correct type.
4239     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4240     GenerateSubTypeObjectCheckNoReadBarrier(
4241         codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4242   }
4243 }
4244 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4245 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4246                                          CodeGeneratorARMVIXL* codegen,
4247                                          VarHandleSlowPathARMVIXL* slow_path) {
4248   VarHandleOptimizations optimizations(invoke);
4249   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4250   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4251   vixl32::Register object = InputRegisterAt(invoke, 1);
4252   vixl32::Register index = InputRegisterAt(invoke, 2);
4253   DataType::Type value_type =
4254       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4255   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4256 
4257   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4258   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4259   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4260   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4261   const MemberOffset class_offset = mirror::Object::ClassOffset();
4262   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4263 
4264   // Null-check the object.
4265   if (!optimizations.GetSkipObjectNullCheck()) {
4266     __ Cmp(object, 0);
4267     __ B(eq, slow_path->GetEntryLabel());
4268   }
4269 
4270   // Use the offset temporary register. It is not used yet at this point.
4271   vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4272 
4273   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4274   vixl32::Register temp2 = temps.Acquire();
4275 
4276   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4277   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4278   // coordinateType0 shall not be null but we do not explicitly verify that.
4279   DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4280   __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4281   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4282   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4283   __ Cmp(temp2, 0);
4284   __ B(eq, slow_path->GetEntryLabel());
4285 
4286   // Check object class against componentType0.
4287   //
4288   // This is an exact check and we defer other cases to the runtime. This includes
4289   // conversion to array of superclass references, which is valid but subsequently
4290   // requires all update operations to check that the value can indeed be stored.
4291   // We do not want to perform such extra checks in the intrinsified code.
4292   //
4293   // We do this check without read barrier, so there can be false negatives which we
4294   // defer to the slow path. There shall be no false negatives for array classes in the
4295   // boot image (including Object[] and primitive arrays) because they are non-movable.
4296   __ Ldr(temp2, MemOperand(object, class_offset.Int32Value()));
4297   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4298   __ Cmp(temp, temp2);
4299   __ B(ne, slow_path->GetEntryLabel());
4300 
4301   // Check that the coordinateType0 is an array type. We do not need a read barrier
4302   // for loading constant reference fields (or chains of them) for comparison with null,
4303   // nor for finally loading a constant primitive field (primitive type) below.
4304   __ Ldr(temp2, MemOperand(temp, component_type_offset.Int32Value()));
4305   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4306   __ Cmp(temp2, 0);
4307   __ B(eq, slow_path->GetEntryLabel());
4308 
4309   // Check that the array component type matches the primitive type.
4310   // With the exception of `kPrimNot`, `kPrimByte` and `kPrimBoolean`,
4311   // we shall check for a byte array view in the slow path.
4312   // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4313   // so we cannot emit that if we're JITting without boot image.
4314   bool boot_image_available =
4315       codegen->GetCompilerOptions().IsBootImage() ||
4316       !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4317   bool can_be_view =
4318       ((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
4319       boot_image_available;
4320   vixl32::Label* slow_path_label =
4321       can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4322   __ Ldrh(temp2, MemOperand(temp2, primitive_type_offset.Int32Value()));
4323   __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4324   __ B(ne, slow_path_label);
4325 
4326   // Check for array index out of bounds.
4327   __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
4328   __ Cmp(index, temp);
4329   __ B(hs, slow_path->GetEntryLabel());
4330 }
4331 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4332 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4333                                               CodeGeneratorARMVIXL* codegen,
4334                                               VarHandleSlowPathARMVIXL* slow_path) {
4335   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4336   if (expected_coordinates_count == 0u) {
4337     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4338   } else if (expected_coordinates_count == 1u) {
4339     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4340   } else {
4341     DCHECK_EQ(expected_coordinates_count, 2u);
4342     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4343   }
4344 }
4345 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,DataType::Type type)4346 static VarHandleSlowPathARMVIXL* GenerateVarHandleChecks(HInvoke* invoke,
4347                                                          CodeGeneratorARMVIXL* codegen,
4348                                                          std::memory_order order,
4349                                                          DataType::Type type) {
4350   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4351   VarHandleOptimizations optimizations(invoke);
4352   if (optimizations.GetUseKnownImageVarHandle()) {
4353     DCHECK_NE(expected_coordinates_count, 2u);
4354     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4355       return nullptr;
4356     }
4357   }
4358 
4359   VarHandleSlowPathARMVIXL* slow_path =
4360       new (codegen->GetScopedAllocator()) VarHandleSlowPathARMVIXL(invoke, order);
4361   codegen->AddSlowPath(slow_path);
4362 
4363   if (!optimizations.GetUseKnownImageVarHandle()) {
4364     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4365   }
4366   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4367 
4368   return slow_path;
4369 }
4370 
4371 struct VarHandleTarget {
4372   vixl32::Register object;  // The object holding the value to operate on.
4373   vixl32::Register offset;  // The offset of the value to operate on.
4374 };
4375 
GetVarHandleTarget(HInvoke * invoke)4376 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4377   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4378   LocationSummary* locations = invoke->GetLocations();
4379 
4380   VarHandleTarget target;
4381   // The temporary allocated for loading the offset.
4382   target.offset = RegisterFrom(locations->GetTemp(0u));
4383   // The reference to the object that holds the value to operate on.
4384   target.object = (expected_coordinates_count == 0u)
4385       ? RegisterFrom(locations->GetTemp(1u))
4386       : InputRegisterAt(invoke, 1);
4387   return target;
4388 }
4389 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARMVIXL * codegen)4390 static void GenerateVarHandleTarget(HInvoke* invoke,
4391                                     const VarHandleTarget& target,
4392                                     CodeGeneratorARMVIXL* codegen) {
4393   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4394   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4395   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4396 
4397   if (expected_coordinates_count <= 1u) {
4398     if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4399       ScopedObjectAccess soa(Thread::Current());
4400       ArtField* target_field = GetBootImageVarHandleField(invoke);
4401       if (expected_coordinates_count == 0u) {
4402         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4403         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4404           uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
4405           codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
4406         } else {
4407           codegen->LoadTypeForBootImageIntrinsic(
4408               target.object,
4409               TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
4410         }
4411       }
4412       __ Mov(target.offset, target_field->GetOffset().Uint32Value());
4413     } else {
4414       // For static fields, we need to fill the `target.object` with the declaring class,
4415       // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4416       // we do not need the declaring class, so we can forget the `ArtField*` when
4417       // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4418       vixl32::Register field = (expected_coordinates_count == 0) ? target.object : target.offset;
4419 
4420       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4421       const MemberOffset offset_offset = ArtField::OffsetOffset();
4422 
4423       // Load the ArtField*, the offset and, if needed, declaring class.
4424       __ Ldr(field, MemOperand(varhandle, art_field_offset.Int32Value()));
4425       __ Ldr(target.offset, MemOperand(field, offset_offset.Int32Value()));
4426       if (expected_coordinates_count == 0u) {
4427         codegen->GenerateGcRootFieldLoad(invoke,
4428                                          LocationFrom(target.object),
4429                                          field,
4430                                          ArtField::DeclaringClassOffset().Int32Value(),
4431                                          codegen->GetCompilerReadBarrierOption());
4432       }
4433     }
4434   } else {
4435     DCHECK_EQ(expected_coordinates_count, 2u);
4436     DataType::Type value_type =
4437         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4438     uint32_t size_shift = DataType::SizeShift(value_type);
4439     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4440 
4441     vixl32::Register index = InputRegisterAt(invoke, 2);
4442     vixl32::Register shifted_index = index;
4443     if (size_shift != 0u) {
4444       shifted_index = target.offset;
4445       __ Lsl(shifted_index, index, size_shift);
4446     }
4447     __ Add(target.offset, shifted_index, data_offset.Int32Value());
4448   }
4449 }
4450 
CreateVarHandleCommonLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)4451 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
4452                                                        CodeGeneratorARMVIXL* codegen) {
4453   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4454   DataType::Type return_type = invoke->GetType();
4455 
4456   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4457   LocationSummary* locations =
4458       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4459   locations->SetInAt(0, Location::RequiresRegister());
4460   // Require coordinates in registers. These are the object holding the value
4461   // to operate on (except for static fields) and index (for arrays and views).
4462   for (size_t i = 0; i != expected_coordinates_count; ++i) {
4463     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4464   }
4465   if (return_type != DataType::Type::kVoid) {
4466     if (DataType::IsFloatingPointType(return_type)) {
4467       locations->SetOut(Location::RequiresFpuRegister());
4468     } else {
4469       locations->SetOut(Location::RequiresRegister());
4470     }
4471   }
4472   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4473   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4474   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4475     HInstruction* arg = invoke->InputAt(arg_index);
4476     if (DataType::IsFloatingPointType(arg->GetType())) {
4477       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4478     } else {
4479       locations->SetInAt(arg_index, Location::RequiresRegister());
4480     }
4481   }
4482 
4483   // Add a temporary for offset.
4484   if (codegen->EmitNonBakerReadBarrier() &&
4485       GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4486     // To preserve the offset value across the non-Baker read barrier slow path
4487     // for loading the declaring class, use a fixed callee-save register.
4488     constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4489     locations->AddTemp(Location::RegisterLocation(first_callee_save));
4490   } else {
4491     locations->AddTemp(Location::RequiresRegister());
4492   }
4493   if (expected_coordinates_count == 0u) {
4494     // Add a temporary to hold the declaring class.
4495     locations->AddTemp(Location::RequiresRegister());
4496   }
4497 
4498   return locations;
4499 }
4500 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4501 static void CreateVarHandleGetLocations(HInvoke* invoke,
4502                                         CodeGeneratorARMVIXL* codegen,
4503                                         bool atomic) {
4504   VarHandleOptimizations optimizations(invoke);
4505   if (optimizations.GetDoNotIntrinsify()) {
4506     return;
4507   }
4508 
4509   if (codegen->EmitNonBakerReadBarrier() &&
4510       invoke->GetType() == DataType::Type::kReference &&
4511       invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4512       invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4513     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4514     // the passed reference and reloads it from the field. This gets the memory visibility
4515     // wrong for Acquire/Volatile operations. b/173104084
4516     return;
4517   }
4518 
4519   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4520 
4521   DataType::Type type = invoke->GetType();
4522   if (type == DataType::Type::kFloat64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
4523     // We need 3 temporaries for GenerateIntrinsicGet() but we can reuse the
4524     // declaring class (if present) and offset temporary.
4525     DCHECK_EQ(locations->GetTempCount(),
4526               (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4527     locations->AddRegisterTemps(3u - locations->GetTempCount());
4528   }
4529 }
4530 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4531 static void GenerateVarHandleGet(HInvoke* invoke,
4532                                  CodeGeneratorARMVIXL* codegen,
4533                                  std::memory_order order,
4534                                  bool atomic,
4535                                  bool byte_swap = false) {
4536   DataType::Type type = invoke->GetType();
4537   DCHECK_NE(type, DataType::Type::kVoid);
4538 
4539   LocationSummary* locations = invoke->GetLocations();
4540   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4541   Location out = locations->Out();
4542 
4543   VarHandleTarget target = GetVarHandleTarget(invoke);
4544   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4545   if (!byte_swap) {
4546     slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4547     GenerateVarHandleTarget(invoke, target, codegen);
4548     if (slow_path != nullptr) {
4549       slow_path->SetAtomic(atomic);
4550       __ Bind(slow_path->GetNativeByteOrderLabel());
4551     }
4552   }
4553 
4554   Location maybe_temp = Location::NoLocation();
4555   Location maybe_temp2 = Location::NoLocation();
4556   Location maybe_temp3 = Location::NoLocation();
4557   if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
4558     // Reuse the offset temporary.
4559     maybe_temp = LocationFrom(target.offset);
4560   } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4561     // Reuse the offset temporary and declaring class (if present).
4562     // The address shall be constructed in the scratch register before they are clobbered.
4563     maybe_temp = LocationFrom(target.offset);
4564     DCHECK(maybe_temp.Equals(locations->GetTemp(0)));
4565     if (type == DataType::Type::kFloat64) {
4566       maybe_temp2 = locations->GetTemp(1);
4567       maybe_temp3 = locations->GetTemp(2);
4568     }
4569   }
4570 
4571   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4572   Location loaded_value = out;
4573   DataType::Type load_type = type;
4574   if (byte_swap) {
4575     if (type == DataType::Type::kFloat64) {
4576       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4577         // Change load type to Int64 and promote `maybe_temp2` and `maybe_temp3` to `loaded_value`.
4578         loaded_value = LocationFrom(RegisterFrom(maybe_temp2), RegisterFrom(maybe_temp3));
4579         maybe_temp2 = Location::NoLocation();
4580         maybe_temp3 = Location::NoLocation();
4581       } else {
4582         // Use the offset temporary and the scratch register.
4583         loaded_value = LocationFrom(target.offset, temps.Acquire());
4584       }
4585       load_type = DataType::Type::kInt64;
4586     } else if (type == DataType::Type::kFloat32) {
4587       // Reuse the offset temporary.
4588       loaded_value = LocationFrom(target.offset);
4589       load_type = DataType::Type::kInt32;
4590     } else if (type == DataType::Type::kInt64) {
4591       // Swap the high and low registers and reverse the bytes in each after the load.
4592       loaded_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
4593     }
4594   }
4595 
4596   GenerateIntrinsicGet(invoke,
4597                        codegen,
4598                        load_type,
4599                        order,
4600                        atomic,
4601                        target.object,
4602                        target.offset,
4603                        loaded_value,
4604                        maybe_temp,
4605                        maybe_temp2,
4606                        maybe_temp3);
4607   if (byte_swap) {
4608     if (type == DataType::Type::kInt64) {
4609       GenerateReverseBytesInPlaceForEachWord(assembler, loaded_value);
4610     } else {
4611       GenerateReverseBytes(assembler, type, loaded_value, out);
4612     }
4613   }
4614 
4615   if (slow_path != nullptr) {
4616     DCHECK(!byte_swap);
4617     __ Bind(slow_path->GetExitLabel());
4618   }
4619 }
4620 
VisitVarHandleGet(HInvoke * invoke)4621 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4622   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ false);
4623 }
4624 
VisitVarHandleGet(HInvoke * invoke)4625 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4626   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4627 }
4628 
VisitVarHandleGetOpaque(HInvoke * invoke)4629 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4630   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4631 }
4632 
VisitVarHandleGetOpaque(HInvoke * invoke)4633 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4634   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4635 }
4636 
VisitVarHandleGetAcquire(HInvoke * invoke)4637 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4638   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4639 }
4640 
VisitVarHandleGetAcquire(HInvoke * invoke)4641 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4642   GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire, /*atomic=*/ true);
4643 }
4644 
VisitVarHandleGetVolatile(HInvoke * invoke)4645 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4646   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4647 }
4648 
VisitVarHandleGetVolatile(HInvoke * invoke)4649 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4650   GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4651 }
4652 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4653 static void CreateVarHandleSetLocations(HInvoke* invoke,
4654                                         CodeGeneratorARMVIXL* codegen,
4655                                         bool atomic) {
4656   VarHandleOptimizations optimizations(invoke);
4657   if (optimizations.GetDoNotIntrinsify()) {
4658     return;
4659   }
4660 
4661   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4662 
4663   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4664   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4665   if (DataType::Is64BitType(value_type)) {
4666     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4667     DCHECK_EQ(locations->GetTempCount(), (expected_coordinates_count == 0) ? 2u : 1u);
4668     HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
4669     bool has_reverse_bytes_slow_path =
4670         (expected_coordinates_count == 2u) &&
4671         !IsZeroBitPattern(arg);
4672     if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4673       // We need 4 temporaries in the byte array view slow path. Otherwise, we need
4674       // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
4675       // We can reuse the offset temporary and declaring class (if present).
4676       size_t temps_needed = has_reverse_bytes_slow_path
4677           ? 4u
4678           : ((value_type == DataType::Type::kFloat64) ? 3u : 2u);
4679       locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4680     } else if (has_reverse_bytes_slow_path) {
4681       // We need 2 temps for the value with reversed bytes in the byte array view slow path.
4682       // We can reuse the offset temporary.
4683       DCHECK_EQ(locations->GetTempCount(), 1u);
4684       locations->AddTemp(Location::RequiresRegister());
4685     }
4686   }
4687 }
4688 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4689 static void GenerateVarHandleSet(HInvoke* invoke,
4690                                  CodeGeneratorARMVIXL* codegen,
4691                                  std::memory_order order,
4692                                  bool atomic,
4693                                  bool byte_swap = false) {
4694   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4695   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4696 
4697   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4698   LocationSummary* locations = invoke->GetLocations();
4699   Location value = locations->InAt(value_index);
4700 
4701   VarHandleTarget target = GetVarHandleTarget(invoke);
4702   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4703   if (!byte_swap) {
4704     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4705     GenerateVarHandleTarget(invoke, target, codegen);
4706     if (slow_path != nullptr) {
4707       slow_path->SetAtomic(atomic);
4708       __ Bind(slow_path->GetNativeByteOrderLabel());
4709     }
4710   }
4711 
4712   Location maybe_temp = Location::NoLocation();
4713   Location maybe_temp2 = Location::NoLocation();
4714   Location maybe_temp3 = Location::NoLocation();
4715   if (DataType::Is64BitType(value_type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4716     // Reuse the offset temporary and declaring class (if present).
4717     // The address shall be constructed in the scratch register before they are clobbered.
4718     maybe_temp = locations->GetTemp(0);
4719     maybe_temp2 = locations->GetTemp(1);
4720     if (value_type == DataType::Type::kFloat64) {
4721       maybe_temp3 = locations->GetTemp(2);
4722     }
4723   }
4724 
4725   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4726   if (byte_swap) {
4727     if (DataType::Is64BitType(value_type) || value_type == DataType::Type::kFloat32) {
4728       // Calculate the address in scratch register, so that we can use the offset temporary.
4729       vixl32::Register base = temps.Acquire();
4730       __ Add(base, target.object, target.offset);
4731       target.object = base;
4732       target.offset = vixl32::Register();
4733     }
4734     Location original_value = value;
4735     if (DataType::Is64BitType(value_type)) {
4736       size_t temp_start = 0u;
4737       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4738         // Clear `maybe_temp3` which was initialized above for Float64.
4739         DCHECK_IMPLIES(value_type == DataType::Type::kFloat64,
4740                        maybe_temp3.Equals(locations->GetTemp(2)));
4741         maybe_temp3 = Location::NoLocation();
4742         temp_start = 2u;
4743       }
4744       value = LocationFrom(RegisterFrom(locations->GetTemp(temp_start)),
4745                            RegisterFrom(locations->GetTemp(temp_start + 1u)));
4746       if (value_type == DataType::Type::kFloat64) {
4747         __ Vmov(HighRegisterFrom(value), LowRegisterFrom(value), DRegisterFrom(original_value));
4748         GenerateReverseBytesInPlaceForEachWord(assembler, value);
4749         value_type = DataType::Type::kInt64;
4750       } else {
4751         GenerateReverseBytes(assembler, value_type, original_value, value);
4752       }
4753     } else if (value_type == DataType::Type::kFloat32) {
4754       value = locations->GetTemp(0);  // Use the offset temporary which was freed above.
4755       __ Vmov(RegisterFrom(value), SRegisterFrom(original_value));
4756       GenerateReverseBytes(assembler, DataType::Type::kInt32, value, value);
4757       value_type = DataType::Type::kInt32;
4758     } else {
4759       value = LocationFrom(temps.Acquire());
4760       GenerateReverseBytes(assembler, value_type, original_value, value);
4761     }
4762   }
4763 
4764   GenerateIntrinsicSet(codegen,
4765                        value_type,
4766                        order,
4767                        atomic,
4768                        target.object,
4769                        target.offset,
4770                        value,
4771                        maybe_temp,
4772                        maybe_temp2,
4773                        maybe_temp3);
4774 
4775   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4776     // Reuse the offset temporary for MarkGCCard.
4777     vixl32::Register temp = target.offset;
4778     vixl32::Register card = temps.Acquire();
4779     vixl32::Register value_reg = RegisterFrom(value);
4780     codegen->MaybeMarkGCCard(temp, card, target.object, value_reg, /* emit_null_check= */ true);
4781   }
4782 
4783   if (slow_path != nullptr) {
4784     DCHECK(!byte_swap);
4785     __ Bind(slow_path->GetExitLabel());
4786   }
4787 }
4788 
VisitVarHandleSet(HInvoke * invoke)4789 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4790   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ false);
4791 }
4792 
VisitVarHandleSet(HInvoke * invoke)4793 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4794   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4795 }
4796 
VisitVarHandleSetOpaque(HInvoke * invoke)4797 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4798   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4799 }
4800 
VisitVarHandleSetOpaque(HInvoke * invoke)4801 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4802   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4803 }
4804 
VisitVarHandleSetRelease(HInvoke * invoke)4805 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4806   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4807 }
4808 
VisitVarHandleSetRelease(HInvoke * invoke)4809 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4810   GenerateVarHandleSet(invoke, codegen_, std::memory_order_release, /*atomic=*/ true);
4811 }
4812 
VisitVarHandleSetVolatile(HInvoke * invoke)4813 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4814   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4815 }
4816 
VisitVarHandleSetVolatile(HInvoke * invoke)4817 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4818   // ARM store-release instructions are implicitly sequentially consistent.
4819   GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4820 }
4821 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool return_success)4822 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4823                                                             CodeGeneratorARMVIXL* codegen,
4824                                                             bool return_success) {
4825   VarHandleOptimizations optimizations(invoke);
4826   if (optimizations.GetDoNotIntrinsify()) {
4827     return;
4828   }
4829 
4830   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4831   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4832   if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
4833     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4834     // the passed reference and reloads it from the field. This breaks the read barriers
4835     // in slow path in different ways. The marked old value may not actually be a to-space
4836     // reference to the same object as `old_value`, breaking slow path assumptions. And
4837     // for CompareAndExchange, marking the old value after comparison failure may actually
4838     // return the reference to `expected`, erroneously indicating success even though we
4839     // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4840     return;
4841   }
4842 
4843   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
4844 
4845   if (codegen->EmitNonBakerReadBarrier()) {
4846     // We need callee-save registers for both the class object and offset instead of
4847     // the temporaries reserved in CreateVarHandleCommonLocations().
4848     static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
4849     constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4850     constexpr int second_callee_save = CTZ(kArmCalleeSaveRefSpills ^ (1u << first_callee_save));
4851     if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4852       DCHECK_EQ(locations->GetTempCount(), 2u);
4853       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4854       DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4855       locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4856     } else {
4857       DCHECK_EQ(locations->GetTempCount(), 1u);
4858       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4859       locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4860     }
4861   }
4862 
4863   if (DataType::IsFloatingPointType(value_type)) {
4864     // We can reuse the declaring class (if present) and offset temporary.
4865     DCHECK_EQ(locations->GetTempCount(),
4866               (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4867     size_t temps_needed = (value_type == DataType::Type::kFloat64)
4868         ? (return_success ? 5u : 7u)
4869         : (return_success ? 3u : 4u);
4870     locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4871   } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4872     // Add temps for the byte-reversed `expected` and `new_value` in the byte array view slow path.
4873     DCHECK_EQ(locations->GetTempCount(), 1u);
4874     if (value_type == DataType::Type::kInt64) {
4875       // We would ideally add 4 temps for Int64 but that would simply run out of registers,
4876       // so we instead need to reverse bytes in actual arguments and undo it at the end.
4877     } else {
4878       locations->AddRegisterTemps(2u);
4879     }
4880   }
4881   if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
4882     // Add a temporary for store result, also used for the `old_value_temp` in slow path.
4883     locations->AddTemp(Location::RequiresRegister());
4884   }
4885 }
4886 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4887 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4888                                                      CodeGeneratorARMVIXL* codegen,
4889                                                      std::memory_order order,
4890                                                      bool return_success,
4891                                                      bool strong,
4892                                                      bool byte_swap = false) {
4893   DCHECK(return_success || strong);
4894 
4895   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4896   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4897   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4898   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4899 
4900   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4901   LocationSummary* locations = invoke->GetLocations();
4902   Location expected = locations->InAt(expected_index);
4903   Location new_value = locations->InAt(new_value_index);
4904   Location out = locations->Out();
4905 
4906   VarHandleTarget target = GetVarHandleTarget(invoke);
4907   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4908   if (!byte_swap) {
4909     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4910     GenerateVarHandleTarget(invoke, target, codegen);
4911     if (slow_path != nullptr) {
4912       slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4913       __ Bind(slow_path->GetNativeByteOrderLabel());
4914     }
4915   }
4916 
4917   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
4918   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
4919   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
4920   DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
4921 
4922   if (release_barrier) {
4923     codegen->GenerateMemoryBarrier(
4924         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
4925   }
4926 
4927   // Calculate the pointer to the value.
4928   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4929   vixl32::Register tmp_ptr = temps.Acquire();
4930   __ Add(tmp_ptr, target.object, target.offset);
4931 
4932   // Move floating point values to temporaries and prepare output registers.
4933   // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4934   // Reuse the declaring class (if present) and offset temporary for non-reference types,
4935   // the address has already been constructed in the scratch register. We are more careful
4936   // for references due to read and write barrier, see below.
4937   Location old_value;
4938   vixl32::Register store_result;
4939   vixl32::Register success = return_success ? RegisterFrom(out) : vixl32::Register();
4940   DataType::Type cas_type = value_type;
4941   if (value_type == DataType::Type::kFloat64) {
4942     vixl32::DRegister expected_vreg = DRegisterFrom(expected);
4943     vixl32::DRegister new_value_vreg = DRegisterFrom(new_value);
4944     expected =
4945         LocationFrom(RegisterFrom(locations->GetTemp(0)), RegisterFrom(locations->GetTemp(1)));
4946     new_value =
4947         LocationFrom(RegisterFrom(locations->GetTemp(2)), RegisterFrom(locations->GetTemp(3)));
4948     store_result = RegisterFrom(locations->GetTemp(4));
4949     old_value = return_success
4950         ? LocationFrom(success, store_result)
4951         : LocationFrom(RegisterFrom(locations->GetTemp(5)), RegisterFrom(locations->GetTemp(6)));
4952     if (byte_swap) {
4953       __ Vmov(HighRegisterFrom(expected), LowRegisterFrom(expected), expected_vreg);
4954       __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), new_value_vreg);
4955       GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4956       GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4957     } else {
4958       __ Vmov(LowRegisterFrom(expected), HighRegisterFrom(expected), expected_vreg);
4959       __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), new_value_vreg);
4960     }
4961     cas_type = DataType::Type::kInt64;
4962   } else if (value_type == DataType::Type::kFloat32) {
4963     vixl32::SRegister expected_vreg = SRegisterFrom(expected);
4964     vixl32::SRegister new_value_vreg = SRegisterFrom(new_value);
4965     expected = locations->GetTemp(0);
4966     new_value = locations->GetTemp(1);
4967     store_result = RegisterFrom(locations->GetTemp(2));
4968     old_value = return_success ? LocationFrom(store_result) : locations->GetTemp(3);
4969     __ Vmov(RegisterFrom(expected), expected_vreg);
4970     __ Vmov(RegisterFrom(new_value), new_value_vreg);
4971     if (byte_swap) {
4972       GenerateReverseBytes(assembler, DataType::Type::kInt32, expected, expected);
4973       GenerateReverseBytes(assembler, DataType::Type::kInt32, new_value, new_value);
4974     }
4975     cas_type = DataType::Type::kInt32;
4976   } else if (value_type == DataType::Type::kInt64) {
4977     store_result = RegisterFrom(locations->GetTemp(0));
4978     old_value = return_success
4979         ? LocationFrom(success, store_result)
4980         // If swapping bytes, swap the high/low regs and reverse the bytes in each after the load.
4981         : byte_swap ? LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)) : out;
4982     if (byte_swap) {
4983       // Due to lack of registers, reverse bytes in `expected` and `new_value` and undo that later.
4984       GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4985       expected = LocationFrom(HighRegisterFrom(expected), LowRegisterFrom(expected));
4986       GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4987       new_value = LocationFrom(HighRegisterFrom(new_value), LowRegisterFrom(new_value));
4988     }
4989   } else {
4990     // Use the last temp. For references with read barriers, this is an extra temporary
4991     // allocated to avoid overwriting the temporaries for declaring class (if present)
4992     // and offset as they are needed in the slow path. Otherwise, this is the offset
4993     // temporary which also works for references without read barriers that need the
4994     // object register preserved for the write barrier.
4995     store_result = RegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u));
4996     old_value = return_success ? LocationFrom(store_result) : out;
4997     if (byte_swap) {
4998       DCHECK_EQ(locations->GetTempCount(), 3u);
4999       Location original_expected = expected;
5000       Location original_new_value = new_value;
5001       expected = locations->GetTemp(0);
5002       new_value = locations->GetTemp(1);
5003       GenerateReverseBytes(assembler, value_type, original_expected, expected);
5004       GenerateReverseBytes(assembler, value_type, original_new_value, new_value);
5005     }
5006   }
5007 
5008   vixl32::Label exit_loop_label;
5009   vixl32::Label* exit_loop = &exit_loop_label;
5010   vixl32::Label* cmp_failure = &exit_loop_label;
5011 
5012   if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5013     // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
5014     // reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`.
5015     vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result;
5016     // The slow path store result must not clobber `old_value`.
5017     vixl32::Register slow_path_store_result = old_value_temp;
5018     ReadBarrierCasSlowPathARMVIXL* rb_slow_path =
5019         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
5020             invoke,
5021             strong,
5022             target.object,
5023             target.offset,
5024             RegisterFrom(expected),
5025             RegisterFrom(new_value),
5026             RegisterFrom(old_value),
5027             old_value_temp,
5028             slow_path_store_result,
5029             success,
5030             codegen);
5031     codegen->AddSlowPath(rb_slow_path);
5032     exit_loop = rb_slow_path->GetExitLabel();
5033     cmp_failure = rb_slow_path->GetEntryLabel();
5034   }
5035 
5036   GenerateCompareAndSet(codegen,
5037                         cas_type,
5038                         strong,
5039                         cmp_failure,
5040                         /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
5041                         tmp_ptr,
5042                         expected,
5043                         new_value,
5044                         old_value,
5045                         store_result,
5046                         success);
5047   __ Bind(exit_loop);
5048 
5049   if (acquire_barrier) {
5050     codegen->GenerateMemoryBarrier(
5051         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5052   }
5053 
5054   if (byte_swap && value_type == DataType::Type::kInt64) {
5055     // Undo byte swapping in `expected` and `new_value`. We do not have the
5056     // information whether the value in these registers shall be needed later.
5057     GenerateReverseBytesInPlaceForEachWord(assembler, expected);
5058     GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
5059   }
5060   if (!return_success) {
5061     if (byte_swap) {
5062       if (value_type == DataType::Type::kInt64) {
5063         GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5064       } else {
5065         GenerateReverseBytes(assembler, value_type, old_value, out);
5066       }
5067     } else if (value_type == DataType::Type::kFloat64) {
5068       __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5069     } else if (value_type == DataType::Type::kFloat32) {
5070       __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
5071     }
5072   }
5073 
5074   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
5075     // Reuse the offset temporary and scratch register for MarkGCCard.
5076     vixl32::Register temp = target.offset;
5077     vixl32::Register card = tmp_ptr;
5078     // Mark card for object assuming new value is stored.
5079     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5080     codegen->MaybeMarkGCCard(
5081         temp, card, target.object, RegisterFrom(new_value), new_value_can_be_null);
5082   }
5083 
5084   if (slow_path != nullptr) {
5085     DCHECK(!byte_swap);
5086     __ Bind(slow_path->GetExitLabel());
5087   }
5088 }
5089 
VisitVarHandleCompareAndExchange(HInvoke * invoke)5090 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5091   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5092 }
5093 
VisitVarHandleCompareAndExchange(HInvoke * invoke)5094 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5095   GenerateVarHandleCompareAndSetOrExchange(
5096       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
5097 }
5098 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5099 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5100   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5101 }
5102 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5103 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5104   GenerateVarHandleCompareAndSetOrExchange(
5105       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
5106 }
5107 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5108 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5109   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
5110 }
5111 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5112 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5113   GenerateVarHandleCompareAndSetOrExchange(
5114       invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
5115 }
5116 
VisitVarHandleCompareAndSet(HInvoke * invoke)5117 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5118   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5119 }
5120 
VisitVarHandleCompareAndSet(HInvoke * invoke)5121 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5122   GenerateVarHandleCompareAndSetOrExchange(
5123       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
5124 }
5125 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5126 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5127   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5128 }
5129 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5130 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5131   GenerateVarHandleCompareAndSetOrExchange(
5132       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
5133 }
5134 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5135 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5136   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5137 }
5138 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5139 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5140   GenerateVarHandleCompareAndSetOrExchange(
5141       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
5142 }
5143 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5144 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5145   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5146 }
5147 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5148 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5149   GenerateVarHandleCompareAndSetOrExchange(
5150       invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
5151 }
5152 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5153 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5154   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
5155 }
5156 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5157 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5158   GenerateVarHandleCompareAndSetOrExchange(
5159       invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
5160 }
5161 
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op)5162 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
5163                                                  CodeGeneratorARMVIXL* codegen,
5164                                                  GetAndUpdateOp get_and_update_op) {
5165   VarHandleOptimizations optimizations(invoke);
5166   if (optimizations.GetDoNotIntrinsify()) {
5167     return;
5168   }
5169 
5170   if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5171     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5172     // the passed reference and reloads it from the field, thus seeing the new value
5173     // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
5174     return;
5175   }
5176 
5177   LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
5178 
5179   // We can reuse the declaring class (if present) and offset temporary, except for
5180   // non-Baker read barriers that need them for the slow path.
5181   DCHECK_EQ(locations->GetTempCount(),
5182             (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5183 
5184   DataType::Type value_type = invoke->GetType();
5185   if (get_and_update_op == GetAndUpdateOp::kSet) {
5186     if (DataType::IsFloatingPointType(value_type)) {
5187       // Add temps needed to do the GenerateGetAndUpdate() with core registers.
5188       size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
5189       locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5190     } else if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
5191       // We need to preserve the declaring class (if present) and offset for read barrier
5192       // slow paths, so we must use a separate temporary for the exclusive store result.
5193       locations->AddTemp(Location::RequiresRegister());
5194     } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5195       // Add temps for the byte-reversed `arg` in the byte array view slow path.
5196       DCHECK_EQ(locations->GetTempCount(), 1u);
5197       locations->AddRegisterTemps((value_type == DataType::Type::kInt64) ? 2u : 1u);
5198     }
5199   } else {
5200     // We need temporaries for the new value and exclusive store result.
5201     size_t temps_needed = DataType::Is64BitType(value_type) ? 3u : 2u;
5202     if (get_and_update_op != GetAndUpdateOp::kAdd &&
5203         GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5204       // Add temps for the byte-reversed `arg` in the byte array view slow path.
5205       if (value_type == DataType::Type::kInt64) {
5206         // We would ideally add 2 temps for Int64 but that would simply run out of registers,
5207         // so we instead need to reverse bytes in the actual argument and undo it at the end.
5208       } else {
5209         temps_needed += 1u;
5210       }
5211     }
5212     locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5213     if (DataType::IsFloatingPointType(value_type)) {
5214       // Note: This shall allocate a D register. There is no way to request an S register.
5215       locations->AddTemp(Location::RequiresFpuRegister());
5216     }
5217   }
5218 }
5219 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)5220 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5221                                           CodeGeneratorARMVIXL* codegen,
5222                                           GetAndUpdateOp get_and_update_op,
5223                                           std::memory_order order,
5224                                           bool byte_swap = false) {
5225   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5226   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5227 
5228   ArmVIXLAssembler* assembler = codegen->GetAssembler();
5229   LocationSummary* locations = invoke->GetLocations();
5230   Location arg = locations->InAt(arg_index);
5231   Location out = locations->Out();
5232 
5233   VarHandleTarget target = GetVarHandleTarget(invoke);
5234   VarHandleSlowPathARMVIXL* slow_path = nullptr;
5235   if (!byte_swap) {
5236     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5237     GenerateVarHandleTarget(invoke, target, codegen);
5238     if (slow_path != nullptr) {
5239       slow_path->SetGetAndUpdateOp(get_and_update_op);
5240       __ Bind(slow_path->GetNativeByteOrderLabel());
5241     }
5242   }
5243 
5244   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
5245   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
5246   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
5247   DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
5248 
5249   if (release_barrier) {
5250     codegen->GenerateMemoryBarrier(
5251         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
5252   }
5253 
5254   // Use the scratch register for the pointer to the target location.
5255   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5256   vixl32::Register tmp_ptr = temps.Acquire();
5257   __ Add(tmp_ptr, target.object, target.offset);
5258 
5259   // Use the offset temporary for the exclusive store result.
5260   vixl32::Register store_result = target.offset;
5261 
5262   // The load/store type is never floating point.
5263   DataType::Type load_store_type = DataType::IsFloatingPointType(value_type)
5264       ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5265       : value_type;
5266 
5267   // Prepare register for old value and temporaries if any.
5268   Location old_value = out;
5269   Location maybe_temp = Location::NoLocation();
5270   Location maybe_vreg_temp = Location::NoLocation();
5271   if (get_and_update_op == GetAndUpdateOp::kSet) {
5272     // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5273     // rather than moving between core and FP registers in the loop.
5274     if (value_type == DataType::Type::kFloat64) {
5275       vixl32::DRegister arg_vreg = DRegisterFrom(arg);
5276       DCHECK_EQ(locations->GetTempCount(), 5u);  // `store_result` and the four here.
5277       old_value =
5278           LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)));
5279       arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4)));
5280       if (byte_swap) {
5281         __ Vmov(HighRegisterFrom(arg), LowRegisterFrom(arg), arg_vreg);
5282         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5283       } else {
5284         __ Vmov(LowRegisterFrom(arg), HighRegisterFrom(arg), arg_vreg);
5285       }
5286     } else if (value_type == DataType::Type::kFloat32) {
5287       vixl32::SRegister arg_vreg = SRegisterFrom(arg);
5288       DCHECK_EQ(locations->GetTempCount(), 3u);  // `store_result` and the two here.
5289       old_value = locations->GetTemp(1);
5290       arg = locations->GetTemp(2);
5291       __ Vmov(RegisterFrom(arg), arg_vreg);
5292       if (byte_swap) {
5293         GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
5294       }
5295     } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5296       if (kUseBakerReadBarrier) {
5297         // Load the old value initially to a temporary register.
5298         // We shall move it to `out` later with a read barrier.
5299         old_value = LocationFrom(store_result);
5300         store_result = RegisterFrom(out);  // Use the `out` for the exclusive store result.
5301       } else {
5302         // The store_result is a separate temporary.
5303         DCHECK(!store_result.Is(target.object));
5304         DCHECK(!store_result.Is(target.offset));
5305       }
5306     } else if (byte_swap) {
5307       Location original_arg = arg;
5308       arg = locations->GetTemp(1);
5309       if (value_type == DataType::Type::kInt64) {
5310         arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2)));
5311         // Swap the high/low regs and reverse the bytes in each after the load.
5312         old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5313       }
5314       GenerateReverseBytes(assembler, value_type, original_arg, arg);
5315     }
5316   } else {
5317     maybe_temp = DataType::Is64BitType(value_type)
5318         ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
5319         : locations->GetTemp(1);
5320     DCHECK(!maybe_temp.Contains(LocationFrom(store_result)));
5321     if (DataType::IsFloatingPointType(value_type)) {
5322       maybe_vreg_temp = locations->GetTemp(locations->GetTempCount() - 1u);
5323       DCHECK(maybe_vreg_temp.IsFpuRegisterPair());
5324     }
5325     if (byte_swap) {
5326       if (get_and_update_op == GetAndUpdateOp::kAdd) {
5327         // We need to do the byte swapping in the CAS loop for GetAndAdd.
5328         get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5329       } else if (value_type == DataType::Type::kInt64) {
5330         // Swap the high/low regs and reverse the bytes in each after the load.
5331         old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5332         // Due to lack of registers, reverse bytes in `arg` and undo that later.
5333         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5334         arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg));
5335       } else {
5336         DCHECK(!DataType::IsFloatingPointType(value_type));
5337         Location original_arg = arg;
5338         arg = locations->GetTemp(2);
5339         DCHECK(!arg.Contains(LocationFrom(store_result)));
5340         GenerateReverseBytes(assembler, value_type, original_arg, arg);
5341       }
5342     }
5343   }
5344 
5345   GenerateGetAndUpdate(codegen,
5346                        get_and_update_op,
5347                        load_store_type,
5348                        tmp_ptr,
5349                        arg,
5350                        old_value,
5351                        store_result,
5352                        maybe_temp,
5353                        maybe_vreg_temp);
5354 
5355   if (acquire_barrier) {
5356     codegen->GenerateMemoryBarrier(
5357         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5358   }
5359 
5360   if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
5361     if (value_type == DataType::Type::kInt64) {
5362       GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5363       if (get_and_update_op != GetAndUpdateOp::kSet) {
5364         // Undo byte swapping in `arg`. We do not have the information
5365         // whether the value in these registers shall be needed later.
5366         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5367       }
5368     } else {
5369       GenerateReverseBytes(assembler, value_type, old_value, out);
5370     }
5371   } else if (get_and_update_op == GetAndUpdateOp::kSet &&
5372              DataType::IsFloatingPointType(value_type)) {
5373     if (value_type == DataType::Type::kFloat64) {
5374       __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5375     } else {
5376       __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
5377     }
5378   } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
5379     if (kUseBakerReadBarrier) {
5380       codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out),
5381                                                          RegisterFrom(old_value));
5382     } else {
5383       codegen->GenerateReadBarrierSlow(
5384           invoke,
5385           Location::RegisterLocation(RegisterFrom(out).GetCode()),
5386           Location::RegisterLocation(RegisterFrom(old_value).GetCode()),
5387           Location::RegisterLocation(target.object.GetCode()),
5388           /*offset=*/ 0u,
5389           /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5390     }
5391   }
5392 
5393   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5394     // Reuse the offset temporary and scratch register for MarkGCCard.
5395     vixl32::Register temp = target.offset;
5396     vixl32::Register card = tmp_ptr;
5397     // Mark card for object assuming new value is stored.
5398     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5399     codegen->MaybeMarkGCCard(temp, card, target.object, RegisterFrom(arg), new_value_can_be_null);
5400   }
5401 
5402   if (slow_path != nullptr) {
5403     DCHECK(!byte_swap);
5404     __ Bind(slow_path->GetExitLabel());
5405   }
5406 }
5407 
VisitVarHandleGetAndSet(HInvoke * invoke)5408 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5409   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5410 }
5411 
VisitVarHandleGetAndSet(HInvoke * invoke)5412 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5413   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5414 }
5415 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5416 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5417   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5418 }
5419 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5420 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5421   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5422 }
5423 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5424 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5425   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
5426 }
5427 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5428 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5429   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5430 }
5431 
VisitVarHandleGetAndAdd(HInvoke * invoke)5432 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5433   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5434 }
5435 
VisitVarHandleGetAndAdd(HInvoke * invoke)5436 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5437   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5438 }
5439 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5440 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5441   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5442 }
5443 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5444 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5445   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5446 }
5447 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5448 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5449   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
5450 }
5451 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5452 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5453   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5454 }
5455 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5456 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5457   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5458 }
5459 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5460 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5461   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5462 }
5463 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5464 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5465   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5466 }
5467 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5468 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5469   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5470 }
5471 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5472 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5473   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
5474 }
5475 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5476 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5477   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5478 }
5479 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5480 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5481   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5482 }
5483 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5484 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5485   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5486 }
5487 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5488 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5489   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5490 }
5491 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5492 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5493   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5494 }
5495 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5496 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5497   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
5498 }
5499 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5500 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5501   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5502 }
5503 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5504 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5505   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5506 }
5507 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5508 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5509   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5510 }
5511 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5512 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5513   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5514 }
5515 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5516 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5517   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5518 }
5519 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5520 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5521   CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
5522 }
5523 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5524 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5525   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5526 }
5527 
EmitByteArrayViewCode(CodeGenerator * codegen_in)5528 void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5529   DCHECK(GetByteArrayViewCheckLabel()->IsReferenced());
5530   CodeGeneratorARMVIXL* codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_in);
5531   ArmVIXLAssembler* assembler = codegen->GetAssembler();
5532   HInvoke* invoke = GetInvoke();
5533   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5534   DataType::Type value_type =
5535       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5536   DCHECK_NE(value_type, DataType::Type::kReference);
5537   size_t size = DataType::Size(value_type);
5538   DCHECK_GT(size, 1u);
5539   vixl32::Operand size_operand(dchecked_integral_cast<int32_t>(size));
5540   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
5541   vixl32::Register object = InputRegisterAt(invoke, 1);
5542   vixl32::Register index = InputRegisterAt(invoke, 2);
5543 
5544   MemberOffset class_offset = mirror::Object::ClassOffset();
5545   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5546   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5547   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5548 
5549   __ Bind(GetByteArrayViewCheckLabel());
5550 
5551   VarHandleTarget target = GetVarHandleTarget(invoke);
5552   {
5553     // Use the offset temporary register. It is not used yet at this point.
5554     vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
5555 
5556     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5557     vixl32::Register temp2 = temps.Acquire();
5558 
5559     // The main path checked that the coordinateType0 is an array class that matches
5560     // the class of the actual coordinate argument but it does not match the value type.
5561     // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5562     __ Ldr(temp, MemOperand(varhandle, class_offset.Int32Value()));
5563     codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
5564     codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5565     __ Cmp(temp, temp2);
5566     __ B(ne, GetEntryLabel());
5567 
5568     // Check for array index out of bounds.
5569     __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
5570     if (!temp.IsLow()) {
5571       // Avoid using the 32-bit `cmp temp, #imm` in IT block by loading `size` into `temp2`.
5572       __ Mov(temp2, size_operand);
5573     }
5574     __ Subs(temp, temp, index);
5575     {
5576       // Use ExactAssemblyScope here because we are using IT.
5577       ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
5578                                   2 * k16BitT32InstructionSizeInBytes);
5579       __ it(hs);
5580       if (temp.IsLow()) {
5581         __ cmp(hs, temp, size_operand);
5582       } else {
5583         __ cmp(hs, temp, temp2);
5584       }
5585     }
5586     __ B(lo, GetEntryLabel());
5587 
5588     // Construct the target.
5589     __ Add(target.offset, index, data_offset.Int32Value());  // Note: `temp` cannot be used below.
5590 
5591     // Alignment check. For unaligned access, go to the runtime.
5592     DCHECK(IsPowerOfTwo(size));
5593     __ Tst(target.offset, dchecked_integral_cast<int32_t>(size - 1u));
5594     __ B(ne, GetEntryLabel());
5595 
5596     // Byte order check. For native byte order return to the main path.
5597     if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
5598       HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
5599       if (IsZeroBitPattern(arg)) {
5600         // There is no reason to differentiate between native byte order and byte-swap
5601         // for setting a zero bit pattern. Just return to the main path.
5602         __ B(GetNativeByteOrderLabel());
5603         return;
5604       }
5605     }
5606     __ Ldr(temp2, MemOperand(varhandle, native_byte_order_offset.Int32Value()));
5607     __ Cmp(temp2, 0);
5608     __ B(ne, GetNativeByteOrderLabel());
5609   }
5610 
5611   switch (access_mode_template) {
5612     case mirror::VarHandle::AccessModeTemplate::kGet:
5613       GenerateVarHandleGet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5614       break;
5615     case mirror::VarHandle::AccessModeTemplate::kSet:
5616       GenerateVarHandleSet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5617       break;
5618     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5619     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5620       GenerateVarHandleCompareAndSetOrExchange(
5621           invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5622       break;
5623     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5624       GenerateVarHandleGetAndUpdate(
5625           invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5626       break;
5627   }
5628   __ B(GetExitLabel());
5629 }
5630 
5631 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name)
5632 UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED);
5633 #undef MARK_UNIMPLEMENTED
5634 
5635 UNREACHABLE_INTRINSICS(ARMVIXL)
5636 
5637 #undef __
5638 
5639 }  // namespace arm
5640 }  // namespace art
5641