1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm_vixl.h"
18 
19 #include "arch/arm/callee_save_frame_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "art_method.h"
22 #include "code_generator_arm_vixl.h"
23 #include "common_arm.h"
24 #include "heap_poisoning.h"
25 #include "intrinsics.h"
26 #include "intrinsics_utils.h"
27 #include "lock_word.h"
28 #include "mirror/array-inl.h"
29 #include "mirror/object_array-inl.h"
30 #include "mirror/reference.h"
31 #include "mirror/string-inl.h"
32 #include "scoped_thread_state_change-inl.h"
33 #include "thread-current-inl.h"
34 
35 #include "aarch32/constants-aarch32.h"
36 
37 namespace art {
38 namespace arm {
39 
40 #define __ assembler->GetVIXLAssembler()->
41 
42 using helpers::DRegisterFrom;
43 using helpers::HighRegisterFrom;
44 using helpers::InputDRegisterAt;
45 using helpers::InputRegisterAt;
46 using helpers::InputSRegisterAt;
47 using helpers::Int32ConstantFrom;
48 using helpers::LocationFrom;
49 using helpers::LowRegisterFrom;
50 using helpers::LowSRegisterFrom;
51 using helpers::HighSRegisterFrom;
52 using helpers::OutputDRegister;
53 using helpers::OutputRegister;
54 using helpers::RegisterFrom;
55 using helpers::SRegisterFrom;
56 
57 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
58 
59 using vixl::ExactAssemblyScope;
60 using vixl::CodeBufferCheckScope;
61 
GetAssembler()62 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
63   return codegen_->GetAssembler();
64 }
65 
GetAllocator()66 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
67   return codegen_->GetGraph()->GetAllocator();
68 }
69 
70 using IntrinsicSlowPathARMVIXL = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARMVIXL,
71                                                    SlowPathCodeARMVIXL,
72                                                    ArmVIXLAssembler>;
73 
74 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmVIXLAssembler * assembler,DataType::Type type,const vixl32::Register & array,const Location & pos,const vixl32::Register & base)75 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
76                                           DataType::Type type,
77                                           const vixl32::Register& array,
78                                           const Location& pos,
79                                           const vixl32::Register& base) {
80   // This routine is only used by the SystemArrayCopy intrinsic at the
81   // moment. We can allow DataType::Type::kReference as `type` to implement
82   // the SystemArrayCopyChar intrinsic.
83   DCHECK_EQ(type, DataType::Type::kReference);
84   const int32_t element_size = DataType::Size(type);
85   const uint32_t element_size_shift = DataType::SizeShift(type);
86   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
87 
88   if (pos.IsConstant()) {
89     int32_t constant = Int32ConstantFrom(pos);
90     __ Add(base, array, element_size * constant + data_offset);
91   } else {
92     __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
93     __ Add(base, base, data_offset);
94   }
95 }
96 
97 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmVIXLAssembler * assembler,DataType::Type type,const Location & copy_length,const vixl32::Register & base,const vixl32::Register & end)98 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
99                                          DataType::Type type,
100                                          const Location& copy_length,
101                                          const vixl32::Register& base,
102                                          const vixl32::Register& end) {
103   // This routine is only used by the SystemArrayCopy intrinsic at the
104   // moment. We can allow DataType::Type::kReference as `type` to implement
105   // the SystemArrayCopyChar intrinsic.
106   DCHECK_EQ(type, DataType::Type::kReference);
107   const int32_t element_size = DataType::Size(type);
108   const uint32_t element_size_shift = DataType::SizeShift(type);
109 
110   if (copy_length.IsConstant()) {
111     int32_t constant = Int32ConstantFrom(copy_length);
112     __ Add(end, base, element_size * constant);
113   } else {
114     __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
115   }
116 }
117 
118 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
119 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
120  public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)121   explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
122       : SlowPathCodeARMVIXL(instruction) {
123     DCHECK(kEmitCompilerReadBarrier);
124     DCHECK(kUseBakerReadBarrier);
125   }
126 
EmitNativeCode(CodeGenerator * codegen)127   void EmitNativeCode(CodeGenerator* codegen) override {
128     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
129     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
130     LocationSummary* locations = instruction_->GetLocations();
131     DCHECK(locations->CanCall());
132     DCHECK(instruction_->IsInvokeStaticOrDirect())
133         << "Unexpected instruction in read barrier arraycopy slow path: "
134         << instruction_->DebugName();
135     DCHECK(instruction_->GetLocations()->Intrinsified());
136     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
137 
138     DataType::Type type = DataType::Type::kReference;
139     const int32_t element_size = DataType::Size(type);
140 
141     vixl32::Register dest = InputRegisterAt(instruction_, 2);
142     Location dest_pos = locations->InAt(3);
143     vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
144     vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
145     vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
146     vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
147 
148     __ Bind(GetEntryLabel());
149     // Compute the base destination address in `dst_curr_addr`.
150     GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
151 
152     vixl32::Label loop;
153     __ Bind(&loop);
154     __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
155     assembler->MaybeUnpoisonHeapReference(tmp);
156     // TODO: Inline the mark bit check before calling the runtime?
157     // tmp = ReadBarrier::Mark(tmp);
158     // No need to save live registers; it's taken care of by the
159     // entrypoint. Also, there is no need to update the stack mask,
160     // as this runtime call will not trigger a garbage collection.
161     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
162     // explanations.)
163     DCHECK(!tmp.IsSP());
164     DCHECK(!tmp.IsLR());
165     DCHECK(!tmp.IsPC());
166     // IP is used internally by the ReadBarrierMarkRegX entry point
167     // as a temporary (and not preserved).  It thus cannot be used by
168     // any live register in this slow path.
169     DCHECK(!src_curr_addr.Is(ip));
170     DCHECK(!dst_curr_addr.Is(ip));
171     DCHECK(!src_stop_addr.Is(ip));
172     DCHECK(!tmp.Is(ip));
173     DCHECK(tmp.IsRegister()) << tmp;
174     // TODO: Load the entrypoint once before the loop, instead of
175     // loading it at every iteration.
176     int32_t entry_point_offset =
177         Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
178     // This runtime call does not require a stack map.
179     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
180     assembler->MaybePoisonHeapReference(tmp);
181     __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
182     __ Cmp(src_curr_addr, src_stop_addr);
183     __ B(ne, &loop, /* is_far_target= */ false);
184     __ B(GetExitLabel());
185   }
186 
GetDescription() const187   const char* GetDescription() const override {
188     return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
189   }
190 
191  private:
192   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
193 };
194 
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)195 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
196     : allocator_(codegen->GetGraph()->GetAllocator()),
197       codegen_(codegen),
198       assembler_(codegen->GetAssembler()),
199       features_(codegen->GetInstructionSetFeatures()) {}
200 
TryDispatch(HInvoke * invoke)201 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
202   Dispatch(invoke);
203   LocationSummary* res = invoke->GetLocations();
204   if (res == nullptr) {
205     return false;
206   }
207   return res->Intrinsified();
208 }
209 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)210 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
211   LocationSummary* locations =
212       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
213   locations->SetInAt(0, Location::RequiresFpuRegister());
214   locations->SetOut(Location::RequiresRegister());
215 }
216 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)217 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
218   LocationSummary* locations =
219       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
220   locations->SetInAt(0, Location::RequiresRegister());
221   locations->SetOut(Location::RequiresFpuRegister());
222 }
223 
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)224 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
225   Location input = locations->InAt(0);
226   Location output = locations->Out();
227   if (is64bit) {
228     __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
229   } else {
230     __ Vmov(RegisterFrom(output), SRegisterFrom(input));
231   }
232 }
233 
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)234 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
235   Location input = locations->InAt(0);
236   Location output = locations->Out();
237   if (is64bit) {
238     __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
239   } else {
240     __ Vmov(SRegisterFrom(output), RegisterFrom(input));
241   }
242 }
243 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)244 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
245   CreateFPToIntLocations(allocator_, invoke);
246 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)247 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
248   CreateIntToFPLocations(allocator_, invoke);
249 }
250 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)251 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
252   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
253 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)254 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
255   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
256 }
257 
VisitFloatFloatToRawIntBits(HInvoke * invoke)258 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
259   CreateFPToIntLocations(allocator_, invoke);
260 }
VisitFloatIntBitsToFloat(HInvoke * invoke)261 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
262   CreateIntToFPLocations(allocator_, invoke);
263 }
264 
VisitFloatFloatToRawIntBits(HInvoke * invoke)265 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
266   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
267 }
VisitFloatIntBitsToFloat(HInvoke * invoke)268 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
269   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
270 }
271 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)272 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
273   LocationSummary* locations =
274       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
275   locations->SetInAt(0, Location::RequiresRegister());
276   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
277 }
278 
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)279 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
280   LocationSummary* locations =
281       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
282   locations->SetInAt(0, Location::RequiresRegister());
283   locations->SetInAt(1, Location::RequiresRegister());
284   // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
285   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
286 }
287 
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)288 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
289   LocationSummary* locations =
290       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
291   locations->SetInAt(0, Location::RequiresRegister());
292   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
293 }
294 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)295 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
296   LocationSummary* locations =
297       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
298   locations->SetInAt(0, Location::RequiresFpuRegister());
299   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
300 }
301 
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)302 static void GenNumberOfLeadingZeros(HInvoke* invoke,
303                                     DataType::Type type,
304                                     CodeGeneratorARMVIXL* codegen) {
305   ArmVIXLAssembler* assembler = codegen->GetAssembler();
306   LocationSummary* locations = invoke->GetLocations();
307   Location in = locations->InAt(0);
308   vixl32::Register out = RegisterFrom(locations->Out());
309 
310   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
311 
312   if (type == DataType::Type::kInt64) {
313     vixl32::Register in_reg_lo = LowRegisterFrom(in);
314     vixl32::Register in_reg_hi = HighRegisterFrom(in);
315     vixl32::Label end;
316     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
317     __ Clz(out, in_reg_hi);
318     __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
319     __ Clz(out, in_reg_lo);
320     __ Add(out, out, 32);
321     if (end.IsReferenced()) {
322       __ Bind(&end);
323     }
324   } else {
325     __ Clz(out, RegisterFrom(in));
326   }
327 }
328 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)329 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
330   CreateIntToIntLocations(allocator_, invoke);
331 }
332 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)333 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
334   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
335 }
336 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)337 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
338   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
339 }
340 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)341 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
342   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
343 }
344 
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)345 static void GenNumberOfTrailingZeros(HInvoke* invoke,
346                                      DataType::Type type,
347                                      CodeGeneratorARMVIXL* codegen) {
348   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
349 
350   ArmVIXLAssembler* assembler = codegen->GetAssembler();
351   LocationSummary* locations = invoke->GetLocations();
352   vixl32::Register out = RegisterFrom(locations->Out());
353 
354   if (type == DataType::Type::kInt64) {
355     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
356     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
357     vixl32::Label end;
358     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
359     __ Rbit(out, in_reg_lo);
360     __ Clz(out, out);
361     __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
362     __ Rbit(out, in_reg_hi);
363     __ Clz(out, out);
364     __ Add(out, out, 32);
365     if (end.IsReferenced()) {
366       __ Bind(&end);
367     }
368   } else {
369     vixl32::Register in = RegisterFrom(locations->InAt(0));
370     __ Rbit(out, in);
371     __ Clz(out, out);
372   }
373 }
374 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)375 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
376   CreateIntToIntLocations(allocator_, invoke);
377 }
378 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)379 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
380   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
381 }
382 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)383 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
384   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
385 }
386 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)387 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
388   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
389 }
390 
VisitMathSqrt(HInvoke * invoke)391 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
392   CreateFPToFPLocations(allocator_, invoke);
393 }
394 
VisitMathSqrt(HInvoke * invoke)395 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
396   ArmVIXLAssembler* assembler = GetAssembler();
397   __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
398 }
399 
VisitMathRint(HInvoke * invoke)400 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
401   if (features_.HasARMv8AInstructions()) {
402     CreateFPToFPLocations(allocator_, invoke);
403   }
404 }
405 
VisitMathRint(HInvoke * invoke)406 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
407   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
408   ArmVIXLAssembler* assembler = GetAssembler();
409   __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
410 }
411 
VisitMathRoundFloat(HInvoke * invoke)412 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
413   if (features_.HasARMv8AInstructions()) {
414     LocationSummary* locations =
415         new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
416     locations->SetInAt(0, Location::RequiresFpuRegister());
417     locations->SetOut(Location::RequiresRegister());
418     locations->AddTemp(Location::RequiresFpuRegister());
419   }
420 }
421 
VisitMathRoundFloat(HInvoke * invoke)422 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
423   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
424 
425   ArmVIXLAssembler* assembler = GetAssembler();
426   vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
427   vixl32::Register out_reg = OutputRegister(invoke);
428   vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
429   vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
430   vixl32::Label done;
431   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
432 
433   // Round to nearest integer, ties away from zero.
434   __ Vcvta(S32, F32, temp1, in_reg);
435   __ Vmov(out_reg, temp1);
436 
437   // For positive, zero or NaN inputs, rounding is done.
438   __ Cmp(out_reg, 0);
439   __ B(ge, final_label, /* is_far_target= */ false);
440 
441   // Handle input < 0 cases.
442   // If input is negative but not a tie, previous result (round to nearest) is valid.
443   // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
444   __ Vrinta(F32, temp1, in_reg);
445   __ Vmov(temp2, 0.5);
446   __ Vsub(F32, temp1, in_reg, temp1);
447   __ Vcmp(F32, temp1, temp2);
448   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
449   {
450     // Use ExactAssemblyScope here because we are using IT.
451     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
452                                 2 * kMaxInstructionSizeInBytes,
453                                 CodeBufferCheckScope::kMaximumSize);
454     __ it(eq);
455     __ add(eq, out_reg, out_reg, 1);
456   }
457 
458   if (done.IsReferenced()) {
459     __ Bind(&done);
460   }
461 }
462 
VisitMemoryPeekByte(HInvoke * invoke)463 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
464   CreateIntToIntLocations(allocator_, invoke);
465 }
466 
VisitMemoryPeekByte(HInvoke * invoke)467 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
468   ArmVIXLAssembler* assembler = GetAssembler();
469   // Ignore upper 4B of long address.
470   __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
471 }
472 
VisitMemoryPeekIntNative(HInvoke * invoke)473 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
474   CreateIntToIntLocations(allocator_, invoke);
475 }
476 
VisitMemoryPeekIntNative(HInvoke * invoke)477 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
478   ArmVIXLAssembler* assembler = GetAssembler();
479   // Ignore upper 4B of long address.
480   __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
481 }
482 
VisitMemoryPeekLongNative(HInvoke * invoke)483 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
484   CreateIntToIntLocations(allocator_, invoke);
485 }
486 
VisitMemoryPeekLongNative(HInvoke * invoke)487 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
488   ArmVIXLAssembler* assembler = GetAssembler();
489   // Ignore upper 4B of long address.
490   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
491   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
492   // exception. So we can't use ldrd as addr may be unaligned.
493   vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
494   vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
495   if (addr.Is(lo)) {
496     __ Ldr(hi, MemOperand(addr, 4));
497     __ Ldr(lo, MemOperand(addr));
498   } else {
499     __ Ldr(lo, MemOperand(addr));
500     __ Ldr(hi, MemOperand(addr, 4));
501   }
502 }
503 
VisitMemoryPeekShortNative(HInvoke * invoke)504 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
505   CreateIntToIntLocations(allocator_, invoke);
506 }
507 
VisitMemoryPeekShortNative(HInvoke * invoke)508 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
509   ArmVIXLAssembler* assembler = GetAssembler();
510   // Ignore upper 4B of long address.
511   __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
512 }
513 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)514 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
515   LocationSummary* locations =
516       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
517   locations->SetInAt(0, Location::RequiresRegister());
518   locations->SetInAt(1, Location::RequiresRegister());
519 }
520 
VisitMemoryPokeByte(HInvoke * invoke)521 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
522   CreateIntIntToVoidLocations(allocator_, invoke);
523 }
524 
VisitMemoryPokeByte(HInvoke * invoke)525 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
526   ArmVIXLAssembler* assembler = GetAssembler();
527   __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
528 }
529 
VisitMemoryPokeIntNative(HInvoke * invoke)530 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
531   CreateIntIntToVoidLocations(allocator_, invoke);
532 }
533 
VisitMemoryPokeIntNative(HInvoke * invoke)534 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
535   ArmVIXLAssembler* assembler = GetAssembler();
536   __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
537 }
538 
VisitMemoryPokeLongNative(HInvoke * invoke)539 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
540   CreateIntIntToVoidLocations(allocator_, invoke);
541 }
542 
VisitMemoryPokeLongNative(HInvoke * invoke)543 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
544   ArmVIXLAssembler* assembler = GetAssembler();
545   // Ignore upper 4B of long address.
546   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
547   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
548   // exception. So we can't use ldrd as addr may be unaligned.
549   __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
550   __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
551 }
552 
VisitMemoryPokeShortNative(HInvoke * invoke)553 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
554   CreateIntIntToVoidLocations(allocator_, invoke);
555 }
556 
VisitMemoryPokeShortNative(HInvoke * invoke)557 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
558   ArmVIXLAssembler* assembler = GetAssembler();
559   __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
560 }
561 
VisitThreadCurrentThread(HInvoke * invoke)562 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
563   LocationSummary* locations =
564       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
565   locations->SetOut(Location::RequiresRegister());
566 }
567 
VisitThreadCurrentThread(HInvoke * invoke)568 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
569   ArmVIXLAssembler* assembler = GetAssembler();
570   __ Ldr(OutputRegister(invoke),
571          MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
572 }
573 
VisitStringCompareTo(HInvoke * invoke)574 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
575   // The inputs plus one temp.
576   LocationSummary* locations =
577       new (allocator_) LocationSummary(invoke,
578                                        invoke->InputAt(1)->CanBeNull()
579                                            ? LocationSummary::kCallOnSlowPath
580                                            : LocationSummary::kNoCall,
581                                        kIntrinsified);
582   locations->SetInAt(0, Location::RequiresRegister());
583   locations->SetInAt(1, Location::RequiresRegister());
584   locations->AddTemp(Location::RequiresRegister());
585   locations->AddTemp(Location::RequiresRegister());
586   locations->AddTemp(Location::RequiresRegister());
587   // Need temporary registers for String compression's feature.
588   if (mirror::kUseStringCompression) {
589     locations->AddTemp(Location::RequiresRegister());
590   }
591   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
592 }
593 
594 // Forward declaration.
595 //
596 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
597 // by the compiler for every C++ function, and if this function gets inlined in
598 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
599 // build failure. That is the reason why NO_INLINE attribute is used.
600 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
601                                                   HInvoke* invoke,
602                                                   vixl32::Label* end,
603                                                   vixl32::Label* different_compression);
604 
VisitStringCompareTo(HInvoke * invoke)605 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
606   ArmVIXLAssembler* assembler = GetAssembler();
607   LocationSummary* locations = invoke->GetLocations();
608 
609   const vixl32::Register str = InputRegisterAt(invoke, 0);
610   const vixl32::Register arg = InputRegisterAt(invoke, 1);
611   const vixl32::Register out = OutputRegister(invoke);
612 
613   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
614   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
615   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
616   vixl32::Register temp3;
617   if (mirror::kUseStringCompression) {
618     temp3 = RegisterFrom(locations->GetTemp(3));
619   }
620 
621   vixl32::Label end;
622   vixl32::Label different_compression;
623 
624   // Get offsets of count and value fields within a string object.
625   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
626 
627   // Note that the null check must have been done earlier.
628   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
629 
630   // Take slow path and throw if input can be and is null.
631   SlowPathCodeARMVIXL* slow_path = nullptr;
632   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
633   if (can_slow_path) {
634     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
635     codegen_->AddSlowPath(slow_path);
636     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
637   }
638 
639   // Reference equality check, return 0 if same reference.
640   __ Subs(out, str, arg);
641   __ B(eq, &end);
642 
643   if (mirror::kUseStringCompression) {
644     // Load `count` fields of this and argument strings.
645     __ Ldr(temp3, MemOperand(str, count_offset));
646     __ Ldr(temp2, MemOperand(arg, count_offset));
647     // Extract lengths from the `count` fields.
648     __ Lsr(temp0, temp3, 1u);
649     __ Lsr(temp1, temp2, 1u);
650   } else {
651     // Load lengths of this and argument strings.
652     __ Ldr(temp0, MemOperand(str, count_offset));
653     __ Ldr(temp1, MemOperand(arg, count_offset));
654   }
655   // out = length diff.
656   __ Subs(out, temp0, temp1);
657   // temp0 = min(len(str), len(arg)).
658 
659   {
660     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
661                            2 * kMaxInstructionSizeInBytes,
662                            CodeBufferCheckScope::kMaximumSize);
663 
664     __ it(gt);
665     __ mov(gt, temp0, temp1);
666   }
667 
668   // Shorter string is empty?
669   // Note that mirror::kUseStringCompression==true introduces lots of instructions,
670   // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
671   __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
672 
673   if (mirror::kUseStringCompression) {
674     // Check if both strings using same compression style to use this comparison loop.
675     __ Eors(temp2, temp2, temp3);
676     __ Lsrs(temp2, temp2, 1u);
677     __ B(cs, &different_compression);
678     // For string compression, calculate the number of bytes to compare (not chars).
679     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
680     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
681 
682     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
683                            2 * kMaxInstructionSizeInBytes,
684                            CodeBufferCheckScope::kMaximumSize);
685 
686     __ it(ne);
687     __ add(ne, temp0, temp0, temp0);
688   }
689 
690 
691   GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
692 
693   __ Bind(&end);
694 
695   if (can_slow_path) {
696     __ Bind(slow_path->GetExitLabel());
697   }
698 }
699 
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)700 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
701                                         HInvoke* invoke,
702                                         vixl32::Label* end,
703                                         vixl32::Label* different_compression) {
704   LocationSummary* locations = invoke->GetLocations();
705 
706   const vixl32::Register str = InputRegisterAt(invoke, 0);
707   const vixl32::Register arg = InputRegisterAt(invoke, 1);
708   const vixl32::Register out = OutputRegister(invoke);
709 
710   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
711   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
712   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
713   vixl32::Register temp3;
714   if (mirror::kUseStringCompression) {
715     temp3 = RegisterFrom(locations->GetTemp(3));
716   }
717 
718   vixl32::Label loop;
719   vixl32::Label find_char_diff;
720 
721   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
722   // Store offset of string value in preparation for comparison loop.
723   __ Mov(temp1, value_offset);
724 
725   // Assertions that must hold in order to compare multiple characters at a time.
726   CHECK_ALIGNED(value_offset, 8);
727   static_assert(IsAligned<8>(kObjectAlignment),
728                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
729 
730   const unsigned char_size = DataType::Size(DataType::Type::kUint16);
731   DCHECK_EQ(char_size, 2u);
732 
733   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
734 
735   vixl32::Label find_char_diff_2nd_cmp;
736   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
737   __ Bind(&loop);
738   vixl32::Register temp_reg = temps.Acquire();
739   __ Ldr(temp_reg, MemOperand(str, temp1));
740   __ Ldr(temp2, MemOperand(arg, temp1));
741   __ Cmp(temp_reg, temp2);
742   __ B(ne, &find_char_diff, /* is_far_target= */ false);
743   __ Add(temp1, temp1, char_size * 2);
744 
745   __ Ldr(temp_reg, MemOperand(str, temp1));
746   __ Ldr(temp2, MemOperand(arg, temp1));
747   __ Cmp(temp_reg, temp2);
748   __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
749   __ Add(temp1, temp1, char_size * 2);
750   // With string compression, we have compared 8 bytes, otherwise 4 chars.
751   __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
752   __ B(hi, &loop, /* is_far_target= */ false);
753   __ B(end);
754 
755   __ Bind(&find_char_diff_2nd_cmp);
756   if (mirror::kUseStringCompression) {
757     __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
758     __ B(ls, end, /* is_far_target= */ false);  // Was the second comparison fully beyond the end?
759   } else {
760     // Without string compression, we can start treating temp0 as signed
761     // and rely on the signed comparison below.
762     __ Sub(temp0, temp0, 2);
763   }
764 
765   // Find the single character difference.
766   __ Bind(&find_char_diff);
767   // Get the bit position of the first character that differs.
768   __ Eor(temp1, temp2, temp_reg);
769   __ Rbit(temp1, temp1);
770   __ Clz(temp1, temp1);
771 
772   // temp0 = number of characters remaining to compare.
773   // (Without string compression, it could be < 1 if a difference is found by the second CMP
774   // in the comparison loop, and after the end of the shorter string data).
775 
776   // Without string compression (temp1 >> 4) = character where difference occurs between the last
777   // two words compared, in the interval [0,1].
778   // (0 for low half-word different, 1 for high half-word different).
779   // With string compression, (temp1 << 3) = byte where the difference occurs,
780   // in the interval [0,3].
781 
782   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
783   // the remaining string data, so just return length diff (out).
784   // The comparison is unsigned for string compression, otherwise signed.
785   __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
786   __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
787 
788   // Extract the characters and calculate the difference.
789   if (mirror::kUseStringCompression) {
790     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
791     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
792     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
793     __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
794     __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
795     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
796     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
797     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
798     __ Lsr(out, temp_reg, temp1);                       // Extract first character.
799     __ And(temp2, temp2, temp3);
800     __ And(out, out, temp3);
801   } else {
802     __ Bic(temp1, temp1, 0xf);
803     __ Lsr(temp2, temp2, temp1);
804     __ Lsr(out, temp_reg, temp1);
805     __ Movt(temp2, 0);
806     __ Movt(out, 0);
807   }
808 
809   __ Sub(out, out, temp2);
810   temps.Release(temp_reg);
811 
812   if (mirror::kUseStringCompression) {
813     __ B(end);
814     __ Bind(different_compression);
815 
816     // Comparison for different compression style.
817     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
818     DCHECK_EQ(c_char_size, 1u);
819 
820     // We want to free up the temp3, currently holding `str.count`, for comparison.
821     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
822     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
823     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
824     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
825     __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
826     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
827     __ Mov(temp1, str);
828     __ Mov(temp2, arg);
829     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
830     {
831       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
832                              3 * kMaxInstructionSizeInBytes,
833                              CodeBufferCheckScope::kMaximumSize);
834       __ itt(cs);                             // Interleave with selection of temp1 and temp2.
835       __ mov(cs, temp1, arg);                 // Preserves flags.
836       __ mov(cs, temp2, str);                 // Preserves flags.
837     }
838     __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
839 
840     // Adjust temp1 and temp2 from string pointers to data pointers.
841     __ Add(temp1, temp1, value_offset);
842     __ Add(temp2, temp2, value_offset);
843 
844     vixl32::Label different_compression_loop;
845     vixl32::Label different_compression_diff;
846 
847     // Main loop for different compression.
848     temp_reg = temps.Acquire();
849     __ Bind(&different_compression_loop);
850     __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
851     __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
852     __ Cmp(temp_reg, temp3);
853     __ B(ne, &different_compression_diff, /* is_far_target= */ false);
854     __ Subs(temp0, temp0, 2);
855     __ B(hi, &different_compression_loop, /* is_far_target= */ false);
856     __ B(end);
857 
858     // Calculate the difference.
859     __ Bind(&different_compression_diff);
860     __ Sub(out, temp_reg, temp3);
861     temps.Release(temp_reg);
862     // Flip the difference if the `arg` is compressed.
863     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
864     __ Lsrs(temp0, temp0, 1u);
865     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
866                   "Expecting 0=compressed, 1=uncompressed");
867 
868     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
869                            2 * kMaxInstructionSizeInBytes,
870                            CodeBufferCheckScope::kMaximumSize);
871     __ it(cc);
872     __ rsb(cc, out, out, 0);
873   }
874 }
875 
876 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
877 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
878 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
879 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
880 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
881 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
882 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)883 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
884   if (candidate->IsLoadString()) {
885     HLoadString* load_string = candidate->AsLoadString();
886     const DexFile& dex_file = load_string->GetDexFile();
887     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
888   }
889   return nullptr;
890 }
891 
VisitStringEquals(HInvoke * invoke)892 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
893   LocationSummary* locations =
894       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
895   InvokeRuntimeCallingConventionARMVIXL calling_convention;
896   locations->SetInAt(0, Location::RequiresRegister());
897   locations->SetInAt(1, Location::RequiresRegister());
898 
899   // Temporary registers to store lengths of strings and for calculations.
900   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
901   locations->AddTemp(LocationFrom(r0));
902 
903   // For the generic implementation and for long const strings we need an extra temporary.
904   // We do not need it for short const strings, up to 4 bytes, see code generation below.
905   uint32_t const_string_length = 0u;
906   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
907   if (const_string == nullptr) {
908     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
909   }
910   bool is_compressed =
911       mirror::kUseStringCompression &&
912       const_string != nullptr &&
913       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
914   if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
915     locations->AddTemp(Location::RequiresRegister());
916   }
917 
918   // TODO: If the String.equals() is used only for an immediately following HIf, we can
919   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
920   // Then we shall need an extra temporary register instead of the output register.
921   locations->SetOut(Location::RequiresRegister());
922 }
923 
VisitStringEquals(HInvoke * invoke)924 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
925   ArmVIXLAssembler* assembler = GetAssembler();
926   LocationSummary* locations = invoke->GetLocations();
927 
928   vixl32::Register str = InputRegisterAt(invoke, 0);
929   vixl32::Register arg = InputRegisterAt(invoke, 1);
930   vixl32::Register out = OutputRegister(invoke);
931 
932   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
933 
934   vixl32::Label loop;
935   vixl32::Label end;
936   vixl32::Label return_true;
937   vixl32::Label return_false;
938   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
939 
940   // Get offsets of count, value, and class fields within a string object.
941   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
942   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
943   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
944 
945   // Note that the null check must have been done earlier.
946   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
947 
948   StringEqualsOptimizations optimizations(invoke);
949   if (!optimizations.GetArgumentNotNull()) {
950     // Check if input is null, return false if it is.
951     __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
952   }
953 
954   // Reference equality check, return true if same reference.
955   __ Cmp(str, arg);
956   __ B(eq, &return_true, /* is_far_target= */ false);
957 
958   if (!optimizations.GetArgumentIsString()) {
959     // Instanceof check for the argument by comparing class fields.
960     // All string objects must have the same type since String cannot be subclassed.
961     // Receiver must be a string object, so its class field is equal to all strings' class fields.
962     // If the argument is a string object, its class field must be equal to receiver's class field.
963     //
964     // As the String class is expected to be non-movable, we can read the class
965     // field from String.equals' arguments without read barriers.
966     AssertNonMovableStringClass();
967     // /* HeapReference<Class> */ temp = str->klass_
968     __ Ldr(temp, MemOperand(str, class_offset));
969     // /* HeapReference<Class> */ out = arg->klass_
970     __ Ldr(out, MemOperand(arg, class_offset));
971     // Also, because we use the previously loaded class references only in the
972     // following comparison, we don't need to unpoison them.
973     __ Cmp(temp, out);
974     __ B(ne, &return_false, /* is_far_target= */ false);
975   }
976 
977   // Check if one of the inputs is a const string. Do not special-case both strings
978   // being const, such cases should be handled by constant folding if needed.
979   uint32_t const_string_length = 0u;
980   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
981   if (const_string == nullptr) {
982     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
983     if (const_string != nullptr) {
984       std::swap(str, arg);  // Make sure the const string is in `str`.
985     }
986   }
987   bool is_compressed =
988       mirror::kUseStringCompression &&
989       const_string != nullptr &&
990       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
991 
992   if (const_string != nullptr) {
993     // Load `count` field of the argument string and check if it matches the const string.
994     // Also compares the compression style, if differs return false.
995     __ Ldr(temp, MemOperand(arg, count_offset));
996     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
997     __ B(ne, &return_false, /* is_far_target= */ false);
998   } else {
999     // Load `count` fields of this and argument strings.
1000     __ Ldr(temp, MemOperand(str, count_offset));
1001     __ Ldr(out, MemOperand(arg, count_offset));
1002     // Check if `count` fields are equal, return false if they're not.
1003     // Also compares the compression style, if differs return false.
1004     __ Cmp(temp, out);
1005     __ B(ne, &return_false, /* is_far_target= */ false);
1006   }
1007 
1008   // Assertions that must hold in order to compare strings 4 bytes at a time.
1009   // Ok to do this because strings are zero-padded to kObjectAlignment.
1010   DCHECK_ALIGNED(value_offset, 4);
1011   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1012 
1013   if (const_string != nullptr &&
1014       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1015                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
1016     // Load and compare the contents. Though we know the contents of the short const string
1017     // at compile time, materializing constants may be more code than loading from memory.
1018     int32_t offset = value_offset;
1019     size_t remaining_bytes =
1020         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
1021     while (remaining_bytes > sizeof(uint32_t)) {
1022       vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1023       UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1024       vixl32::Register temp2 = scratch_scope.Acquire();
1025       __ Ldrd(temp, temp1, MemOperand(str, offset));
1026       __ Ldrd(temp2, out, MemOperand(arg, offset));
1027       __ Cmp(temp, temp2);
1028       __ B(ne, &return_false, /* is_far_target= */ false);
1029       __ Cmp(temp1, out);
1030       __ B(ne, &return_false, /* is_far_target= */ false);
1031       offset += 2u * sizeof(uint32_t);
1032       remaining_bytes -= 2u * sizeof(uint32_t);
1033     }
1034     if (remaining_bytes != 0u) {
1035       __ Ldr(temp, MemOperand(str, offset));
1036       __ Ldr(out, MemOperand(arg, offset));
1037       __ Cmp(temp, out);
1038       __ B(ne, &return_false, /* is_far_target= */ false);
1039     }
1040   } else {
1041     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1042     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1043                   "Expecting 0=compressed, 1=uncompressed");
1044     __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
1045 
1046     if (mirror::kUseStringCompression) {
1047       // For string compression, calculate the number of bytes to compare (not chars).
1048       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1049       __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
1050       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1051                              2 * kMaxInstructionSizeInBytes,
1052                              CodeBufferCheckScope::kMaximumSize);
1053       __ it(cs);                                      // If uncompressed,
1054       __ add(cs, temp, temp, temp);                   //   double the byte count.
1055     }
1056 
1057     vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1058     UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1059     vixl32::Register temp2 = scratch_scope.Acquire();
1060 
1061     // Store offset of string value in preparation for comparison loop.
1062     __ Mov(temp1, value_offset);
1063 
1064     // Loop to compare strings 4 bytes at a time starting at the front of the string.
1065     __ Bind(&loop);
1066     __ Ldr(out, MemOperand(str, temp1));
1067     __ Ldr(temp2, MemOperand(arg, temp1));
1068     __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1069     __ Cmp(out, temp2);
1070     __ B(ne, &return_false, /* is_far_target= */ false);
1071     // With string compression, we have compared 4 bytes, otherwise 2 chars.
1072     __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1073     __ B(hi, &loop, /* is_far_target= */ false);
1074   }
1075 
1076   // Return true and exit the function.
1077   // If loop does not result in returning false, we return true.
1078   __ Bind(&return_true);
1079   __ Mov(out, 1);
1080   __ B(final_label);
1081 
1082   // Return false and exit the function.
1083   __ Bind(&return_false);
1084   __ Mov(out, 0);
1085 
1086   if (end.IsReferenced()) {
1087     __ Bind(&end);
1088   }
1089 }
1090 
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1091 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1092                                        ArmVIXLAssembler* assembler,
1093                                        CodeGeneratorARMVIXL* codegen,
1094                                        bool start_at_zero) {
1095   LocationSummary* locations = invoke->GetLocations();
1096 
1097   // Note that the null check must have been done earlier.
1098   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1099 
1100   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1101   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1102   SlowPathCodeARMVIXL* slow_path = nullptr;
1103   HInstruction* code_point = invoke->InputAt(1);
1104   if (code_point->IsIntConstant()) {
1105     if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1106         std::numeric_limits<uint16_t>::max()) {
1107       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1108       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1109       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1110       codegen->AddSlowPath(slow_path);
1111       __ B(slow_path->GetEntryLabel());
1112       __ Bind(slow_path->GetExitLabel());
1113       return;
1114     }
1115   } else if (code_point->GetType() != DataType::Type::kUint16) {
1116     vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1117     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1118     __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1119     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1120     codegen->AddSlowPath(slow_path);
1121     __ B(hs, slow_path->GetEntryLabel());
1122   }
1123 
1124   if (start_at_zero) {
1125     vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1126     DCHECK(tmp_reg.Is(r2));
1127     // Start-index = 0.
1128     __ Mov(tmp_reg, 0);
1129   }
1130 
1131   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1132   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1133 
1134   if (slow_path != nullptr) {
1135     __ Bind(slow_path->GetExitLabel());
1136   }
1137 }
1138 
VisitStringIndexOf(HInvoke * invoke)1139 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1140   LocationSummary* locations = new (allocator_) LocationSummary(
1141       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1142   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1143   // best to align the inputs accordingly.
1144   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1145   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1146   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1147   locations->SetOut(LocationFrom(r0));
1148 
1149   // Need to send start-index=0.
1150   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1151 }
1152 
VisitStringIndexOf(HInvoke * invoke)1153 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1154   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1155 }
1156 
VisitStringIndexOfAfter(HInvoke * invoke)1157 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1158   LocationSummary* locations = new (allocator_) LocationSummary(
1159       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1160   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1161   // best to align the inputs accordingly.
1162   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1163   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1164   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1165   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1166   locations->SetOut(LocationFrom(r0));
1167 }
1168 
VisitStringIndexOfAfter(HInvoke * invoke)1169 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1170   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1171 }
1172 
VisitStringNewStringFromBytes(HInvoke * invoke)1173 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1174   LocationSummary* locations = new (allocator_) LocationSummary(
1175       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1176   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1177   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1178   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1179   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1180   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1181   locations->SetOut(LocationFrom(r0));
1182 }
1183 
VisitStringNewStringFromBytes(HInvoke * invoke)1184 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1185   ArmVIXLAssembler* assembler = GetAssembler();
1186   vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1187   __ Cmp(byte_array, 0);
1188   SlowPathCodeARMVIXL* slow_path =
1189       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1190   codegen_->AddSlowPath(slow_path);
1191   __ B(eq, slow_path->GetEntryLabel());
1192 
1193   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1194   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1195   __ Bind(slow_path->GetExitLabel());
1196 }
1197 
VisitStringNewStringFromChars(HInvoke * invoke)1198 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1199   LocationSummary* locations =
1200       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1201   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1202   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1203   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1204   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1205   locations->SetOut(LocationFrom(r0));
1206 }
1207 
VisitStringNewStringFromChars(HInvoke * invoke)1208 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1209   // No need to emit code checking whether `locations->InAt(2)` is a null
1210   // pointer, as callers of the native method
1211   //
1212   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1213   //
1214   // all include a null check on `data` before calling that method.
1215   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1216   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1217 }
1218 
VisitStringNewStringFromString(HInvoke * invoke)1219 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1220   LocationSummary* locations = new (allocator_) LocationSummary(
1221       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1222   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1223   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1224   locations->SetOut(LocationFrom(r0));
1225 }
1226 
VisitStringNewStringFromString(HInvoke * invoke)1227 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1228   ArmVIXLAssembler* assembler = GetAssembler();
1229   vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1230   __ Cmp(string_to_copy, 0);
1231   SlowPathCodeARMVIXL* slow_path =
1232       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1233   codegen_->AddSlowPath(slow_path);
1234   __ B(eq, slow_path->GetEntryLabel());
1235 
1236   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1237   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1238 
1239   __ Bind(slow_path->GetExitLabel());
1240 }
1241 
VisitSystemArrayCopy(HInvoke * invoke)1242 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1243   // The only read barrier implementation supporting the
1244   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1245   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1246     return;
1247   }
1248 
1249   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1250   LocationSummary* locations = invoke->GetLocations();
1251   if (locations == nullptr) {
1252     return;
1253   }
1254 
1255   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1256   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1257   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1258 
1259   if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1260     locations->SetInAt(1, Location::RequiresRegister());
1261   }
1262   if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1263     locations->SetInAt(3, Location::RequiresRegister());
1264   }
1265   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1266     locations->SetInAt(4, Location::RequiresRegister());
1267   }
1268   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1269     // Temporary register IP cannot be used in
1270     // ReadBarrierSystemArrayCopySlowPathARM (because that register
1271     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1272     // temporary register from the register allocator.
1273     locations->AddTemp(Location::RequiresRegister());
1274   }
1275 }
1276 
CheckPosition(ArmVIXLAssembler * assembler,Location pos,vixl32::Register input,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_input_length=false)1277 static void CheckPosition(ArmVIXLAssembler* assembler,
1278                           Location pos,
1279                           vixl32::Register input,
1280                           Location length,
1281                           SlowPathCodeARMVIXL* slow_path,
1282                           vixl32::Register temp,
1283                           bool length_is_input_length = false) {
1284   // Where is the length in the Array?
1285   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1286 
1287   if (pos.IsConstant()) {
1288     int32_t pos_const = Int32ConstantFrom(pos);
1289     if (pos_const == 0) {
1290       if (!length_is_input_length) {
1291         // Check that length(input) >= length.
1292         __ Ldr(temp, MemOperand(input, length_offset));
1293         if (length.IsConstant()) {
1294           __ Cmp(temp, Int32ConstantFrom(length));
1295         } else {
1296           __ Cmp(temp, RegisterFrom(length));
1297         }
1298         __ B(lt, slow_path->GetEntryLabel());
1299       }
1300     } else {
1301       // Check that length(input) >= pos.
1302       __ Ldr(temp, MemOperand(input, length_offset));
1303       __ Subs(temp, temp, pos_const);
1304       __ B(lt, slow_path->GetEntryLabel());
1305 
1306       // Check that (length(input) - pos) >= length.
1307       if (length.IsConstant()) {
1308         __ Cmp(temp, Int32ConstantFrom(length));
1309       } else {
1310         __ Cmp(temp, RegisterFrom(length));
1311       }
1312       __ B(lt, slow_path->GetEntryLabel());
1313     }
1314   } else if (length_is_input_length) {
1315     // The only way the copy can succeed is if pos is zero.
1316     vixl32::Register pos_reg = RegisterFrom(pos);
1317     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1318   } else {
1319     // Check that pos >= 0.
1320     vixl32::Register pos_reg = RegisterFrom(pos);
1321     __ Cmp(pos_reg, 0);
1322     __ B(lt, slow_path->GetEntryLabel());
1323 
1324     // Check that pos <= length(input).
1325     __ Ldr(temp, MemOperand(input, length_offset));
1326     __ Subs(temp, temp, pos_reg);
1327     __ B(lt, slow_path->GetEntryLabel());
1328 
1329     // Check that (length(input) - pos) >= length.
1330     if (length.IsConstant()) {
1331       __ Cmp(temp, Int32ConstantFrom(length));
1332     } else {
1333       __ Cmp(temp, RegisterFrom(length));
1334     }
1335     __ B(lt, slow_path->GetEntryLabel());
1336   }
1337 }
1338 
VisitSystemArrayCopy(HInvoke * invoke)1339 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1340   // The only read barrier implementation supporting the
1341   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1342   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1343 
1344   ArmVIXLAssembler* assembler = GetAssembler();
1345   LocationSummary* locations = invoke->GetLocations();
1346 
1347   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1348   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1349   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1350   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1351   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1352 
1353   vixl32::Register src = InputRegisterAt(invoke, 0);
1354   Location src_pos = locations->InAt(1);
1355   vixl32::Register dest = InputRegisterAt(invoke, 2);
1356   Location dest_pos = locations->InAt(3);
1357   Location length = locations->InAt(4);
1358   Location temp1_loc = locations->GetTemp(0);
1359   vixl32::Register temp1 = RegisterFrom(temp1_loc);
1360   Location temp2_loc = locations->GetTemp(1);
1361   vixl32::Register temp2 = RegisterFrom(temp2_loc);
1362   Location temp3_loc = locations->GetTemp(2);
1363   vixl32::Register temp3 = RegisterFrom(temp3_loc);
1364 
1365   SlowPathCodeARMVIXL* intrinsic_slow_path =
1366       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1367   codegen_->AddSlowPath(intrinsic_slow_path);
1368 
1369   vixl32::Label conditions_on_positions_validated;
1370   SystemArrayCopyOptimizations optimizations(invoke);
1371 
1372   // If source and destination are the same, we go to slow path if we need to do
1373   // forward copying.
1374   if (src_pos.IsConstant()) {
1375     int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1376     if (dest_pos.IsConstant()) {
1377       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1378       if (optimizations.GetDestinationIsSource()) {
1379         // Checked when building locations.
1380         DCHECK_GE(src_pos_constant, dest_pos_constant);
1381       } else if (src_pos_constant < dest_pos_constant) {
1382         __ Cmp(src, dest);
1383         __ B(eq, intrinsic_slow_path->GetEntryLabel());
1384       }
1385 
1386       // Checked when building locations.
1387       DCHECK(!optimizations.GetDestinationIsSource()
1388              || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1389     } else {
1390       if (!optimizations.GetDestinationIsSource()) {
1391         __ Cmp(src, dest);
1392         __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1393       }
1394       __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1395       __ B(gt, intrinsic_slow_path->GetEntryLabel());
1396     }
1397   } else {
1398     if (!optimizations.GetDestinationIsSource()) {
1399       __ Cmp(src, dest);
1400       __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1401     }
1402     if (dest_pos.IsConstant()) {
1403       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1404       __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1405     } else {
1406       __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1407     }
1408     __ B(lt, intrinsic_slow_path->GetEntryLabel());
1409   }
1410 
1411   __ Bind(&conditions_on_positions_validated);
1412 
1413   if (!optimizations.GetSourceIsNotNull()) {
1414     // Bail out if the source is null.
1415     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1416   }
1417 
1418   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1419     // Bail out if the destination is null.
1420     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1421   }
1422 
1423   // If the length is negative, bail out.
1424   // We have already checked in the LocationsBuilder for the constant case.
1425   if (!length.IsConstant() &&
1426       !optimizations.GetCountIsSourceLength() &&
1427       !optimizations.GetCountIsDestinationLength()) {
1428     __ Cmp(RegisterFrom(length), 0);
1429     __ B(lt, intrinsic_slow_path->GetEntryLabel());
1430   }
1431 
1432   // Validity checks: source.
1433   CheckPosition(assembler,
1434                 src_pos,
1435                 src,
1436                 length,
1437                 intrinsic_slow_path,
1438                 temp1,
1439                 optimizations.GetCountIsSourceLength());
1440 
1441   // Validity checks: dest.
1442   CheckPosition(assembler,
1443                 dest_pos,
1444                 dest,
1445                 length,
1446                 intrinsic_slow_path,
1447                 temp1,
1448                 optimizations.GetCountIsDestinationLength());
1449 
1450   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1451     // Check whether all elements of the source array are assignable to the component
1452     // type of the destination array. We do two checks: the classes are the same,
1453     // or the destination is Object[]. If none of these checks succeed, we go to the
1454     // slow path.
1455 
1456     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1457       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1458         // /* HeapReference<Class> */ temp1 = src->klass_
1459         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1460             invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
1461         // Bail out if the source is not a non primitive array.
1462         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1463         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1464             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1465         __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
1466         // If heap poisoning is enabled, `temp1` has been unpoisoned
1467         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1468         // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1469         __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
1470         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1471         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1472       }
1473 
1474       // /* HeapReference<Class> */ temp1 = dest->klass_
1475       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1476           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
1477 
1478       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1479         // Bail out if the destination is not a non primitive array.
1480         //
1481         // Register `temp1` is not trashed by the read barrier emitted
1482         // by GenerateFieldLoadWithBakerReadBarrier below, as that
1483         // method produces a call to a ReadBarrierMarkRegX entry point,
1484         // which saves all potentially live registers, including
1485         // temporaries such a `temp1`.
1486         // /* HeapReference<Class> */ temp2 = temp1->component_type_
1487         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1488             invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false);
1489         __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
1490         // If heap poisoning is enabled, `temp2` has been unpoisoned
1491         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1492         // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1493         __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
1494         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1495         __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
1496       }
1497 
1498       // For the same reason given earlier, `temp1` is not trashed by the
1499       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1500       // /* HeapReference<Class> */ temp2 = src->klass_
1501       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1502           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
1503       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1504       __ Cmp(temp1, temp2);
1505 
1506       if (optimizations.GetDestinationIsTypedObjectArray()) {
1507         vixl32::Label do_copy;
1508         __ B(eq, &do_copy, /* is_far_target= */ false);
1509         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1510         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1511             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1512         // /* HeapReference<Class> */ temp1 = temp1->super_class_
1513         // We do not need to emit a read barrier for the following
1514         // heap reference load, as `temp1` is only used in a
1515         // comparison with null below, and this reference is not
1516         // kept afterwards.
1517         __ Ldr(temp1, MemOperand(temp1, super_offset));
1518         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1519         __ Bind(&do_copy);
1520       } else {
1521         __ B(ne, intrinsic_slow_path->GetEntryLabel());
1522       }
1523     } else {
1524       // Non read barrier code.
1525 
1526       // /* HeapReference<Class> */ temp1 = dest->klass_
1527       __ Ldr(temp1, MemOperand(dest, class_offset));
1528       // /* HeapReference<Class> */ temp2 = src->klass_
1529       __ Ldr(temp2, MemOperand(src, class_offset));
1530       bool did_unpoison = false;
1531       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1532           !optimizations.GetSourceIsNonPrimitiveArray()) {
1533         // One or two of the references need to be unpoisoned. Unpoison them
1534         // both to make the identity check valid.
1535         assembler->MaybeUnpoisonHeapReference(temp1);
1536         assembler->MaybeUnpoisonHeapReference(temp2);
1537         did_unpoison = true;
1538       }
1539 
1540       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1541         // Bail out if the destination is not a non primitive array.
1542         // /* HeapReference<Class> */ temp3 = temp1->component_type_
1543         __ Ldr(temp3, MemOperand(temp1, component_offset));
1544         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1545         assembler->MaybeUnpoisonHeapReference(temp3);
1546         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1547         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1548         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1549         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1550       }
1551 
1552       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1553         // Bail out if the source is not a non primitive array.
1554         // /* HeapReference<Class> */ temp3 = temp2->component_type_
1555         __ Ldr(temp3, MemOperand(temp2, component_offset));
1556         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1557         assembler->MaybeUnpoisonHeapReference(temp3);
1558         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1559         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1560         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1561         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1562       }
1563 
1564       __ Cmp(temp1, temp2);
1565 
1566       if (optimizations.GetDestinationIsTypedObjectArray()) {
1567         vixl32::Label do_copy;
1568         __ B(eq, &do_copy, /* is_far_target= */ false);
1569         if (!did_unpoison) {
1570           assembler->MaybeUnpoisonHeapReference(temp1);
1571         }
1572         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1573         __ Ldr(temp1, MemOperand(temp1, component_offset));
1574         assembler->MaybeUnpoisonHeapReference(temp1);
1575         // /* HeapReference<Class> */ temp1 = temp1->super_class_
1576         __ Ldr(temp1, MemOperand(temp1, super_offset));
1577         // No need to unpoison the result, we're comparing against null.
1578         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1579         __ Bind(&do_copy);
1580       } else {
1581         __ B(ne, intrinsic_slow_path->GetEntryLabel());
1582       }
1583     }
1584   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1585     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1586     // Bail out if the source is not a non primitive array.
1587     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1588       // /* HeapReference<Class> */ temp1 = src->klass_
1589       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1590           invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
1591       // /* HeapReference<Class> */ temp3 = temp1->component_type_
1592       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1593           invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1594       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1595       // If heap poisoning is enabled, `temp3` has been unpoisoned
1596       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1597     } else {
1598       // /* HeapReference<Class> */ temp1 = src->klass_
1599       __ Ldr(temp1, MemOperand(src, class_offset));
1600       assembler->MaybeUnpoisonHeapReference(temp1);
1601       // /* HeapReference<Class> */ temp3 = temp1->component_type_
1602       __ Ldr(temp3, MemOperand(temp1, component_offset));
1603       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1604       assembler->MaybeUnpoisonHeapReference(temp3);
1605     }
1606     // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1607     __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1608     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1609     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1610   }
1611 
1612   if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
1613     // Null constant length: not need to emit the loop code at all.
1614   } else {
1615     vixl32::Label done;
1616     const DataType::Type type = DataType::Type::kReference;
1617     const int32_t element_size = DataType::Size(type);
1618 
1619     if (length.IsRegister()) {
1620       // Don't enter the copy loop if the length is null.
1621       __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
1622     }
1623 
1624     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1625       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1626 
1627       // SystemArrayCopy implementation for Baker read barriers (see
1628       // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
1629       //
1630       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1631       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1632       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
1633       //   if (is_gray) {
1634       //     // Slow-path copy.
1635       //     do {
1636       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1637       //     } while (src_ptr != end_ptr)
1638       //   } else {
1639       //     // Fast-path copy.
1640       //     do {
1641       //       *dest_ptr++ = *src_ptr++;
1642       //     } while (src_ptr != end_ptr)
1643       //   }
1644 
1645       // /* int32_t */ monitor = src->monitor_
1646       __ Ldr(temp2, MemOperand(src, monitor_offset));
1647       // /* LockWord */ lock_word = LockWord(monitor)
1648       static_assert(sizeof(LockWord) == sizeof(int32_t),
1649                     "art::LockWord and int32_t have different sizes.");
1650 
1651       // Introduce a dependency on the lock_word including the rb_state,
1652       // which shall prevent load-load reordering without using
1653       // a memory barrier (which would be more expensive).
1654       // `src` is unchanged by this operation, but its value now depends
1655       // on `temp2`.
1656       __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
1657 
1658       // Compute the base source address in `temp1`.
1659       // Note that `temp1` (the base source address) is computed from
1660       // `src` (and `src_pos`) here, and thus honors the artificial
1661       // dependency of `src` on `temp2`.
1662       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
1663       // Compute the end source address in `temp3`.
1664       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
1665       // The base destination address is computed later, as `temp2` is
1666       // used for intermediate computations.
1667 
1668       // Slow path used to copy array when `src` is gray.
1669       // Note that the base destination address is computed in `temp2`
1670       // by the slow path code.
1671       SlowPathCodeARMVIXL* read_barrier_slow_path =
1672           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
1673       codegen_->AddSlowPath(read_barrier_slow_path);
1674 
1675       // Given the numeric representation, it's enough to check the low bit of the
1676       // rb_state. We do that by shifting the bit out of the lock word with LSRS
1677       // which can be a 16-bit instruction unlike the TST immediate.
1678       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1679       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1680       __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
1681       // Carry flag is the last bit shifted out by LSRS.
1682       __ B(cs, read_barrier_slow_path->GetEntryLabel());
1683 
1684       // Fast-path copy.
1685       // Compute the base destination address in `temp2`.
1686       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
1687       // Iterate over the arrays and do a raw copy of the objects. We don't need to
1688       // poison/unpoison.
1689       vixl32::Label loop;
1690       __ Bind(&loop);
1691       {
1692         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1693         const vixl32::Register temp_reg = temps.Acquire();
1694         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1695         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1696       }
1697       __ Cmp(temp1, temp3);
1698       __ B(ne, &loop, /* is_far_target= */ false);
1699 
1700       __ Bind(read_barrier_slow_path->GetExitLabel());
1701     } else {
1702       // Non read barrier code.
1703       // Compute the base source address in `temp1`.
1704       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
1705       // Compute the base destination address in `temp2`.
1706       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
1707       // Compute the end source address in `temp3`.
1708       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
1709       // Iterate over the arrays and do a raw copy of the objects. We don't need to
1710       // poison/unpoison.
1711       vixl32::Label loop;
1712       __ Bind(&loop);
1713       {
1714         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1715         const vixl32::Register temp_reg = temps.Acquire();
1716         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1717         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1718       }
1719       __ Cmp(temp1, temp3);
1720       __ B(ne, &loop, /* is_far_target= */ false);
1721     }
1722     __ Bind(&done);
1723   }
1724 
1725   // We only need one card marking on the destination array.
1726   codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null= */ false);
1727 
1728   __ Bind(intrinsic_slow_path->GetExitLabel());
1729 }
1730 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1731 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1732   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1733   // the code generator. Furthermore, the register allocator creates fixed live intervals
1734   // for all caller-saved registers because we are doing a function call. As a result, if
1735   // the input and output locations are unallocated, the register allocator runs out of
1736   // registers and fails; however, a debuggable graph is not the common case.
1737   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1738     return;
1739   }
1740 
1741   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1742   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1743   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1744 
1745   LocationSummary* const locations =
1746       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1747   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1748 
1749   locations->SetInAt(0, Location::RequiresFpuRegister());
1750   locations->SetOut(Location::RequiresFpuRegister());
1751   // Native code uses the soft float ABI.
1752   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1753   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1754 }
1755 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1756 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1757   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1758   // the code generator. Furthermore, the register allocator creates fixed live intervals
1759   // for all caller-saved registers because we are doing a function call. As a result, if
1760   // the input and output locations are unallocated, the register allocator runs out of
1761   // registers and fails; however, a debuggable graph is not the common case.
1762   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1763     return;
1764   }
1765 
1766   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1767   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1768   DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
1769   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1770 
1771   LocationSummary* const locations =
1772       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1773   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1774 
1775   locations->SetInAt(0, Location::RequiresFpuRegister());
1776   locations->SetInAt(1, Location::RequiresFpuRegister());
1777   locations->SetOut(Location::RequiresFpuRegister());
1778   // Native code uses the soft float ABI.
1779   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1780   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1781   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1782   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
1783 }
1784 
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1785 static void GenFPToFPCall(HInvoke* invoke,
1786                           ArmVIXLAssembler* assembler,
1787                           CodeGeneratorARMVIXL* codegen,
1788                           QuickEntrypointEnum entry) {
1789   LocationSummary* const locations = invoke->GetLocations();
1790 
1791   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1792   DCHECK(locations->WillCall() && locations->Intrinsified());
1793 
1794   // Native code uses the soft float ABI.
1795   __ Vmov(RegisterFrom(locations->GetTemp(0)),
1796           RegisterFrom(locations->GetTemp(1)),
1797           InputDRegisterAt(invoke, 0));
1798   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1799   __ Vmov(OutputDRegister(invoke),
1800           RegisterFrom(locations->GetTemp(0)),
1801           RegisterFrom(locations->GetTemp(1)));
1802 }
1803 
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1804 static void GenFPFPToFPCall(HInvoke* invoke,
1805                             ArmVIXLAssembler* assembler,
1806                             CodeGeneratorARMVIXL* codegen,
1807                             QuickEntrypointEnum entry) {
1808   LocationSummary* const locations = invoke->GetLocations();
1809 
1810   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1811   DCHECK(locations->WillCall() && locations->Intrinsified());
1812 
1813   // Native code uses the soft float ABI.
1814   __ Vmov(RegisterFrom(locations->GetTemp(0)),
1815           RegisterFrom(locations->GetTemp(1)),
1816           InputDRegisterAt(invoke, 0));
1817   __ Vmov(RegisterFrom(locations->GetTemp(2)),
1818           RegisterFrom(locations->GetTemp(3)),
1819           InputDRegisterAt(invoke, 1));
1820   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1821   __ Vmov(OutputDRegister(invoke),
1822           RegisterFrom(locations->GetTemp(0)),
1823           RegisterFrom(locations->GetTemp(1)));
1824 }
1825 
VisitMathCos(HInvoke * invoke)1826 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
1827   CreateFPToFPCallLocations(allocator_, invoke);
1828 }
1829 
VisitMathCos(HInvoke * invoke)1830 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
1831   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
1832 }
1833 
VisitMathSin(HInvoke * invoke)1834 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
1835   CreateFPToFPCallLocations(allocator_, invoke);
1836 }
1837 
VisitMathSin(HInvoke * invoke)1838 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
1839   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
1840 }
1841 
VisitMathAcos(HInvoke * invoke)1842 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
1843   CreateFPToFPCallLocations(allocator_, invoke);
1844 }
1845 
VisitMathAcos(HInvoke * invoke)1846 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
1847   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
1848 }
1849 
VisitMathAsin(HInvoke * invoke)1850 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
1851   CreateFPToFPCallLocations(allocator_, invoke);
1852 }
1853 
VisitMathAsin(HInvoke * invoke)1854 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
1855   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
1856 }
1857 
VisitMathAtan(HInvoke * invoke)1858 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
1859   CreateFPToFPCallLocations(allocator_, invoke);
1860 }
1861 
VisitMathAtan(HInvoke * invoke)1862 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
1863   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
1864 }
1865 
VisitMathCbrt(HInvoke * invoke)1866 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1867   CreateFPToFPCallLocations(allocator_, invoke);
1868 }
1869 
VisitMathCbrt(HInvoke * invoke)1870 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1871   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
1872 }
1873 
VisitMathCosh(HInvoke * invoke)1874 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
1875   CreateFPToFPCallLocations(allocator_, invoke);
1876 }
1877 
VisitMathCosh(HInvoke * invoke)1878 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
1879   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
1880 }
1881 
VisitMathExp(HInvoke * invoke)1882 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
1883   CreateFPToFPCallLocations(allocator_, invoke);
1884 }
1885 
VisitMathExp(HInvoke * invoke)1886 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
1887   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
1888 }
1889 
VisitMathExpm1(HInvoke * invoke)1890 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1891   CreateFPToFPCallLocations(allocator_, invoke);
1892 }
1893 
VisitMathExpm1(HInvoke * invoke)1894 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1895   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
1896 }
1897 
VisitMathLog(HInvoke * invoke)1898 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
1899   CreateFPToFPCallLocations(allocator_, invoke);
1900 }
1901 
VisitMathLog(HInvoke * invoke)1902 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
1903   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
1904 }
1905 
VisitMathLog10(HInvoke * invoke)1906 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
1907   CreateFPToFPCallLocations(allocator_, invoke);
1908 }
1909 
VisitMathLog10(HInvoke * invoke)1910 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
1911   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
1912 }
1913 
VisitMathSinh(HInvoke * invoke)1914 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
1915   CreateFPToFPCallLocations(allocator_, invoke);
1916 }
1917 
VisitMathSinh(HInvoke * invoke)1918 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
1919   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
1920 }
1921 
VisitMathTan(HInvoke * invoke)1922 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
1923   CreateFPToFPCallLocations(allocator_, invoke);
1924 }
1925 
VisitMathTan(HInvoke * invoke)1926 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
1927   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
1928 }
1929 
VisitMathTanh(HInvoke * invoke)1930 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
1931   CreateFPToFPCallLocations(allocator_, invoke);
1932 }
1933 
VisitMathTanh(HInvoke * invoke)1934 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
1935   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
1936 }
1937 
VisitMathAtan2(HInvoke * invoke)1938 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1939   CreateFPFPToFPCallLocations(allocator_, invoke);
1940 }
1941 
VisitMathAtan2(HInvoke * invoke)1942 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1943   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
1944 }
1945 
VisitMathPow(HInvoke * invoke)1946 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
1947   CreateFPFPToFPCallLocations(allocator_, invoke);
1948 }
1949 
VisitMathPow(HInvoke * invoke)1950 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
1951   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
1952 }
1953 
VisitMathHypot(HInvoke * invoke)1954 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
1955   CreateFPFPToFPCallLocations(allocator_, invoke);
1956 }
1957 
VisitMathHypot(HInvoke * invoke)1958 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
1959   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
1960 }
1961 
VisitMathNextAfter(HInvoke * invoke)1962 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1963   CreateFPFPToFPCallLocations(allocator_, invoke);
1964 }
1965 
VisitMathNextAfter(HInvoke * invoke)1966 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1967   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
1968 }
1969 
VisitIntegerReverse(HInvoke * invoke)1970 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1971   CreateIntToIntLocations(allocator_, invoke);
1972 }
1973 
VisitIntegerReverse(HInvoke * invoke)1974 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1975   ArmVIXLAssembler* assembler = GetAssembler();
1976   __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
1977 }
1978 
VisitLongReverse(HInvoke * invoke)1979 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
1980   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1981 }
1982 
VisitLongReverse(HInvoke * invoke)1983 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
1984   ArmVIXLAssembler* assembler = GetAssembler();
1985   LocationSummary* locations = invoke->GetLocations();
1986 
1987   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
1988   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
1989   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
1990   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
1991 
1992   __ Rbit(out_reg_lo, in_reg_hi);
1993   __ Rbit(out_reg_hi, in_reg_lo);
1994 }
1995 
GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler * assembler,Location pair)1996 static void GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler* assembler, Location pair) {
1997   DCHECK(pair.IsRegisterPair());
1998   __ Rev(LowRegisterFrom(pair), LowRegisterFrom(pair));
1999   __ Rev(HighRegisterFrom(pair), HighRegisterFrom(pair));
2000 }
2001 
GenerateReverseBytes(ArmVIXLAssembler * assembler,DataType::Type type,Location in,Location out)2002 static void GenerateReverseBytes(ArmVIXLAssembler* assembler,
2003                                  DataType::Type type,
2004                                  Location in,
2005                                  Location out) {
2006   switch (type) {
2007     case DataType::Type::kUint16:
2008       __ Rev16(RegisterFrom(out), RegisterFrom(in));
2009       break;
2010     case DataType::Type::kInt16:
2011       __ Revsh(RegisterFrom(out), RegisterFrom(in));
2012       break;
2013     case DataType::Type::kInt32:
2014       __ Rev(RegisterFrom(out), RegisterFrom(in));
2015       break;
2016     case DataType::Type::kInt64:
2017       DCHECK(!LowRegisterFrom(out).Is(LowRegisterFrom(in)));
2018       __ Rev(LowRegisterFrom(out), HighRegisterFrom(in));
2019       __ Rev(HighRegisterFrom(out), LowRegisterFrom(in));
2020       break;
2021     case DataType::Type::kFloat32:
2022       __ Rev(RegisterFrom(in), RegisterFrom(in));  // Note: Clobbers `in`.
2023       __ Vmov(SRegisterFrom(out), RegisterFrom(in));
2024       break;
2025     case DataType::Type::kFloat64:
2026       GenerateReverseBytesInPlaceForEachWord(assembler, in);  // Note: Clobbers `in`.
2027       __ Vmov(DRegisterFrom(out), HighRegisterFrom(in), LowRegisterFrom(in));  // Swap high/low.
2028       break;
2029     default:
2030       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
2031       UNREACHABLE();
2032   }
2033 }
2034 
VisitIntegerReverseBytes(HInvoke * invoke)2035 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2036   CreateIntToIntLocations(allocator_, invoke);
2037 }
2038 
VisitIntegerReverseBytes(HInvoke * invoke)2039 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2040   ArmVIXLAssembler* assembler = GetAssembler();
2041   LocationSummary* locations = invoke->GetLocations();
2042   GenerateReverseBytes(assembler, DataType::Type::kInt32, locations->InAt(0), locations->Out());
2043 }
2044 
VisitLongReverseBytes(HInvoke * invoke)2045 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2046   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2047 }
2048 
VisitLongReverseBytes(HInvoke * invoke)2049 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2050   ArmVIXLAssembler* assembler = GetAssembler();
2051   LocationSummary* locations = invoke->GetLocations();
2052   GenerateReverseBytes(assembler, DataType::Type::kInt64, locations->InAt(0), locations->Out());
2053 }
2054 
VisitShortReverseBytes(HInvoke * invoke)2055 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2056   CreateIntToIntLocations(allocator_, invoke);
2057 }
2058 
VisitShortReverseBytes(HInvoke * invoke)2059 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2060   ArmVIXLAssembler* assembler = GetAssembler();
2061   LocationSummary* locations = invoke->GetLocations();
2062   GenerateReverseBytes(assembler, DataType::Type::kInt16, locations->InAt(0), locations->Out());
2063 }
2064 
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)2065 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
2066   DCHECK(DataType::IsIntOrLongType(type)) << type;
2067   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
2068   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
2069 
2070   bool is_long = type == DataType::Type::kInt64;
2071   LocationSummary* locations = instr->GetLocations();
2072   Location in = locations->InAt(0);
2073   vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2074   vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2075   vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2076   vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2077   vixl32::Register  out_r = OutputRegister(instr);
2078 
2079   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2080   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2081   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2082   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2083   __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
2084   __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
2085   __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
2086   __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
2087   if (is_long) {
2088     __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
2089   }
2090   __ Vmov(out_r, tmp_s);
2091 }
2092 
VisitIntegerBitCount(HInvoke * invoke)2093 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2094   CreateIntToIntLocations(allocator_, invoke);
2095   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2096 }
2097 
VisitIntegerBitCount(HInvoke * invoke)2098 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2099   GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
2100 }
2101 
VisitLongBitCount(HInvoke * invoke)2102 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2103   VisitIntegerBitCount(invoke);
2104 }
2105 
VisitLongBitCount(HInvoke * invoke)2106 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2107   GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
2108 }
2109 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2110 static void GenHighestOneBit(HInvoke* invoke,
2111                              DataType::Type type,
2112                              CodeGeneratorARMVIXL* codegen) {
2113   DCHECK(DataType::IsIntOrLongType(type));
2114 
2115   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2116   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2117   const vixl32::Register temp = temps.Acquire();
2118 
2119   if (type == DataType::Type::kInt64) {
2120     LocationSummary* locations = invoke->GetLocations();
2121     Location in = locations->InAt(0);
2122     Location out = locations->Out();
2123 
2124     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2125     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2126     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2127     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2128 
2129     __ Mov(temp, 0x80000000);  // Modified immediate.
2130     __ Clz(out_reg_lo, in_reg_lo);
2131     __ Clz(out_reg_hi, in_reg_hi);
2132     __ Lsr(out_reg_lo, temp, out_reg_lo);
2133     __ Lsrs(out_reg_hi, temp, out_reg_hi);
2134 
2135     // Discard result for lowest 32 bits if highest 32 bits are not zero.
2136     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2137     // we check that the output is in a low register, so that a 16-bit MOV
2138     // encoding can be used. If output is in a high register, then we generate
2139     // 4 more bytes of code to avoid a branch.
2140     Operand mov_src(0);
2141     if (!out_reg_lo.IsLow()) {
2142       __ Mov(LeaveFlags, temp, 0);
2143       mov_src = Operand(temp);
2144     }
2145     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2146                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2147                                   CodeBufferCheckScope::kExactSize);
2148     __ it(ne);
2149     __ mov(ne, out_reg_lo, mov_src);
2150   } else {
2151     vixl32::Register out = OutputRegister(invoke);
2152     vixl32::Register in = InputRegisterAt(invoke, 0);
2153 
2154     __ Mov(temp, 0x80000000);  // Modified immediate.
2155     __ Clz(out, in);
2156     __ Lsr(out, temp, out);
2157   }
2158 }
2159 
VisitIntegerHighestOneBit(HInvoke * invoke)2160 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2161   CreateIntToIntLocations(allocator_, invoke);
2162 }
2163 
VisitIntegerHighestOneBit(HInvoke * invoke)2164 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2165   GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2166 }
2167 
VisitLongHighestOneBit(HInvoke * invoke)2168 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2169   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2170 }
2171 
VisitLongHighestOneBit(HInvoke * invoke)2172 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2173   GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2174 }
2175 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2176 static void GenLowestOneBit(HInvoke* invoke,
2177                             DataType::Type type,
2178                             CodeGeneratorARMVIXL* codegen) {
2179   DCHECK(DataType::IsIntOrLongType(type));
2180 
2181   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2182   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2183   const vixl32::Register temp = temps.Acquire();
2184 
2185   if (type == DataType::Type::kInt64) {
2186     LocationSummary* locations = invoke->GetLocations();
2187     Location in = locations->InAt(0);
2188     Location out = locations->Out();
2189 
2190     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2191     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2192     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2193     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2194 
2195     __ Rsb(out_reg_hi, in_reg_hi, 0);
2196     __ Rsb(out_reg_lo, in_reg_lo, 0);
2197     __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2198     // The result of this operation is 0 iff in_reg_lo is 0
2199     __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2200 
2201     // Discard result for highest 32 bits if lowest 32 bits are not zero.
2202     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2203     // we check that the output is in a low register, so that a 16-bit MOV
2204     // encoding can be used. If output is in a high register, then we generate
2205     // 4 more bytes of code to avoid a branch.
2206     Operand mov_src(0);
2207     if (!out_reg_lo.IsLow()) {
2208       __ Mov(LeaveFlags, temp, 0);
2209       mov_src = Operand(temp);
2210     }
2211     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2212                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2213                                   CodeBufferCheckScope::kExactSize);
2214     __ it(ne);
2215     __ mov(ne, out_reg_hi, mov_src);
2216   } else {
2217     vixl32::Register out = OutputRegister(invoke);
2218     vixl32::Register in = InputRegisterAt(invoke, 0);
2219 
2220     __ Rsb(temp, in, 0);
2221     __ And(out, temp, in);
2222   }
2223 }
2224 
VisitIntegerLowestOneBit(HInvoke * invoke)2225 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2226   CreateIntToIntLocations(allocator_, invoke);
2227 }
2228 
VisitIntegerLowestOneBit(HInvoke * invoke)2229 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2230   GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2231 }
2232 
VisitLongLowestOneBit(HInvoke * invoke)2233 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2234   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2235 }
2236 
VisitLongLowestOneBit(HInvoke * invoke)2237 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2238   GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2239 }
2240 
VisitStringGetCharsNoCheck(HInvoke * invoke)2241 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2242   LocationSummary* locations =
2243       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2244   locations->SetInAt(0, Location::RequiresRegister());
2245   locations->SetInAt(1, Location::RequiresRegister());
2246   locations->SetInAt(2, Location::RequiresRegister());
2247   locations->SetInAt(3, Location::RequiresRegister());
2248   locations->SetInAt(4, Location::RequiresRegister());
2249 
2250   // Temporary registers to store lengths of strings and for calculations.
2251   locations->AddTemp(Location::RequiresRegister());
2252   locations->AddTemp(Location::RequiresRegister());
2253   locations->AddTemp(Location::RequiresRegister());
2254 }
2255 
VisitStringGetCharsNoCheck(HInvoke * invoke)2256 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2257   ArmVIXLAssembler* assembler = GetAssembler();
2258   LocationSummary* locations = invoke->GetLocations();
2259 
2260   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2261   const size_t char_size = DataType::Size(DataType::Type::kUint16);
2262   DCHECK_EQ(char_size, 2u);
2263 
2264   // Location of data in char array buffer.
2265   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2266 
2267   // Location of char array data in string.
2268   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2269 
2270   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2271   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2272   vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2273   vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2274   vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2275   vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2276   vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2277 
2278   vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2279   vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2280   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2281 
2282   vixl32::Label done, compressed_string_loop;
2283   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2284   // dst to be copied.
2285   __ Add(dst_ptr, dstObj, data_offset);
2286   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2287 
2288   __ Subs(num_chr, srcEnd, srcBegin);
2289   // Early out for valid zero-length retrievals.
2290   __ B(eq, final_label, /* is_far_target= */ false);
2291 
2292   // src range to copy.
2293   __ Add(src_ptr, srcObj, value_offset);
2294 
2295   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2296   vixl32::Register temp;
2297   vixl32::Label compressed_string_preloop;
2298   if (mirror::kUseStringCompression) {
2299     // Location of count in string.
2300     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2301     temp = temps.Acquire();
2302     // String's length.
2303     __ Ldr(temp, MemOperand(srcObj, count_offset));
2304     __ Tst(temp, 1);
2305     temps.Release(temp);
2306     __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2307   }
2308   __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2309 
2310   // Do the copy.
2311   vixl32::Label loop, remainder;
2312 
2313   temp = temps.Acquire();
2314   // Save repairing the value of num_chr on the < 4 character path.
2315   __ Subs(temp, num_chr, 4);
2316   __ B(lt, &remainder, /* is_far_target= */ false);
2317 
2318   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2319   __ Mov(num_chr, temp);
2320 
2321   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2322   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2323   // to rectify these everywhere this intrinsic applies.)
2324   __ Bind(&loop);
2325   __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2326   __ Subs(num_chr, num_chr, 4);
2327   __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2328   __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2329   __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2330   temps.Release(temp);
2331   __ B(ge, &loop, /* is_far_target= */ false);
2332 
2333   __ Adds(num_chr, num_chr, 4);
2334   __ B(eq, final_label, /* is_far_target= */ false);
2335 
2336   // Main loop for < 4 character case and remainder handling. Loads and stores one
2337   // 16-bit Java character at a time.
2338   __ Bind(&remainder);
2339   temp = temps.Acquire();
2340   __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2341   __ Subs(num_chr, num_chr, 1);
2342   __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2343   temps.Release(temp);
2344   __ B(gt, &remainder, /* is_far_target= */ false);
2345 
2346   if (mirror::kUseStringCompression) {
2347     __ B(final_label);
2348 
2349     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2350     DCHECK_EQ(c_char_size, 1u);
2351     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2352     __ Bind(&compressed_string_preloop);
2353     __ Add(src_ptr, src_ptr, srcBegin);
2354     __ Bind(&compressed_string_loop);
2355     temp = temps.Acquire();
2356     __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2357     __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2358     temps.Release(temp);
2359     __ Subs(num_chr, num_chr, 1);
2360     __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2361   }
2362 
2363   if (done.IsReferenced()) {
2364     __ Bind(&done);
2365   }
2366 }
2367 
VisitFloatIsInfinite(HInvoke * invoke)2368 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2369   CreateFPToIntLocations(allocator_, invoke);
2370 }
2371 
VisitFloatIsInfinite(HInvoke * invoke)2372 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2373   ArmVIXLAssembler* const assembler = GetAssembler();
2374   const vixl32::Register out = OutputRegister(invoke);
2375   // Shifting left by 1 bit makes the value encodable as an immediate operand;
2376   // we don't care about the sign bit anyway.
2377   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2378 
2379   __ Vmov(out, InputSRegisterAt(invoke, 0));
2380   // We don't care about the sign bit, so shift left.
2381   __ Lsl(out, out, 1);
2382   __ Eor(out, out, infinity);
2383   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2384 }
2385 
VisitDoubleIsInfinite(HInvoke * invoke)2386 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2387   CreateFPToIntLocations(allocator_, invoke);
2388 }
2389 
VisitDoubleIsInfinite(HInvoke * invoke)2390 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2391   ArmVIXLAssembler* const assembler = GetAssembler();
2392   const vixl32::Register out = OutputRegister(invoke);
2393   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2394   const vixl32::Register temp = temps.Acquire();
2395   // The highest 32 bits of double precision positive infinity separated into
2396   // two constants encodable as immediate operands.
2397   constexpr uint32_t infinity_high  = 0x7f000000U;
2398   constexpr uint32_t infinity_high2 = 0x00f00000U;
2399 
2400   static_assert((infinity_high | infinity_high2) ==
2401                     static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2402                 "The constants do not add up to the high 32 bits of double "
2403                 "precision positive infinity.");
2404   __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2405   __ Eor(out, out, infinity_high);
2406   __ Eor(out, out, infinity_high2);
2407   // We don't care about the sign bit, so shift left.
2408   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2409   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2410 }
2411 
VisitMathCeil(HInvoke * invoke)2412 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2413   if (features_.HasARMv8AInstructions()) {
2414     CreateFPToFPLocations(allocator_, invoke);
2415   }
2416 }
2417 
VisitMathCeil(HInvoke * invoke)2418 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2419   ArmVIXLAssembler* assembler = GetAssembler();
2420   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2421   __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2422 }
2423 
VisitMathFloor(HInvoke * invoke)2424 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2425   if (features_.HasARMv8AInstructions()) {
2426     CreateFPToFPLocations(allocator_, invoke);
2427   }
2428 }
2429 
VisitMathFloor(HInvoke * invoke)2430 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2431   ArmVIXLAssembler* assembler = GetAssembler();
2432   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2433   __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2434 }
2435 
VisitIntegerValueOf(HInvoke * invoke)2436 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2437   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2438   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2439       invoke,
2440       codegen_,
2441       LocationFrom(r0),
2442       LocationFrom(calling_convention.GetRegisterAt(0)));
2443 }
2444 
VisitIntegerValueOf(HInvoke * invoke)2445 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2446   IntrinsicVisitor::IntegerValueOfInfo info =
2447       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2448   LocationSummary* locations = invoke->GetLocations();
2449   ArmVIXLAssembler* const assembler = GetAssembler();
2450 
2451   vixl32::Register out = RegisterFrom(locations->Out());
2452   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2453   vixl32::Register temp = temps.Acquire();
2454   auto allocate_instance = [&]() {
2455     DCHECK(out.Is(InvokeRuntimeCallingConventionARMVIXL().GetRegisterAt(0)));
2456     codegen_->LoadIntrinsicDeclaringClass(out, invoke);
2457     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2458     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2459   };
2460   if (invoke->InputAt(0)->IsConstant()) {
2461     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2462     if (static_cast<uint32_t>(value - info.low) < info.length) {
2463       // Just embed the j.l.Integer in the code.
2464       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2465       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2466     } else {
2467       DCHECK(locations->CanCall());
2468       // Allocate and initialize a new j.l.Integer.
2469       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2470       // JIT object table.
2471       allocate_instance();
2472       __ Mov(temp, value);
2473       assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
2474       // Class pointer and `value` final field stores require a barrier before publication.
2475       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2476     }
2477   } else {
2478     DCHECK(locations->CanCall());
2479     vixl32::Register in = RegisterFrom(locations->InAt(0));
2480     // Check bounds of our cache.
2481     __ Add(out, in, -info.low);
2482     __ Cmp(out, info.length);
2483     vixl32::Label allocate, done;
2484     __ B(hs, &allocate, /* is_far_target= */ false);
2485     // If the value is within the bounds, load the j.l.Integer directly from the array.
2486     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2487     codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
2488     assembler->MaybeUnpoisonHeapReference(out);
2489     __ B(&done);
2490     __ Bind(&allocate);
2491     // Otherwise allocate and initialize a new j.l.Integer.
2492     allocate_instance();
2493     assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
2494     // Class pointer and `value` final field stores require a barrier before publication.
2495     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2496     __ Bind(&done);
2497   }
2498 }
2499 
VisitReferenceGetReferent(HInvoke * invoke)2500 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2501   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
2502 }
2503 
VisitReferenceGetReferent(HInvoke * invoke)2504 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2505   ArmVIXLAssembler* assembler = GetAssembler();
2506   LocationSummary* locations = invoke->GetLocations();
2507 
2508   Location obj = locations->InAt(0);
2509   Location out = locations->Out();
2510 
2511   SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2512   codegen_->AddSlowPath(slow_path);
2513 
2514   if (kEmitCompilerReadBarrier) {
2515     // Check self->GetWeakRefAccessEnabled().
2516     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2517     vixl32::Register temp = temps.Acquire();
2518     __ Ldr(temp,
2519            MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArmPointerSize>().Uint32Value()));
2520     __ Cmp(temp, 0);
2521     __ B(eq, slow_path->GetEntryLabel());
2522   }
2523 
2524   {
2525     // Load the java.lang.ref.Reference class.
2526     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2527     vixl32::Register temp = temps.Acquire();
2528     codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
2529 
2530     // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
2531     MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
2532     DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
2533     DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
2534               IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
2535     __ Ldrh(temp, MemOperand(temp, disable_intrinsic_offset.Uint32Value()));
2536     __ Cmp(temp, 0);
2537     __ B(ne, slow_path->GetEntryLabel());
2538   }
2539 
2540   // Load the value from the field.
2541   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2542   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2543     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2544                                                     out,
2545                                                     RegisterFrom(obj),
2546                                                     referent_offset,
2547                                                     /*maybe_temp=*/ Location::NoLocation(),
2548                                                     /*needs_null_check=*/ true);
2549     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2550   } else {
2551     {
2552       vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2553       __ Ldr(RegisterFrom(out), MemOperand(RegisterFrom(obj), referent_offset));
2554       codegen_->MaybeRecordImplicitNullCheck(invoke);
2555     }
2556     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2557     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
2558   }
2559   __ Bind(slow_path->GetExitLabel());
2560 }
2561 
VisitReferenceRefersTo(HInvoke * invoke)2562 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2563   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
2564 }
2565 
VisitReferenceRefersTo(HInvoke * invoke)2566 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2567   LocationSummary* locations = invoke->GetLocations();
2568   ArmVIXLAssembler* assembler = GetAssembler();
2569   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2570 
2571   vixl32::Register obj = RegisterFrom(locations->InAt(0));
2572   vixl32::Register other = RegisterFrom(locations->InAt(1));
2573   vixl32::Register out = RegisterFrom(locations->Out());
2574   vixl32::Register tmp = temps.Acquire();
2575 
2576   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2577   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2578 
2579   {
2580     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2581     // Loading scratch register always uses 32-bit encoding.
2582     vixl::ExactAssemblyScope eas(assembler->GetVIXLAssembler(),
2583                                  vixl32::k32BitT32InstructionSizeInBytes);
2584     __ ldr(tmp, MemOperand(obj, referent_offset));
2585     codegen_->MaybeRecordImplicitNullCheck(invoke);
2586   }
2587   assembler->MaybeUnpoisonHeapReference(tmp);
2588   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2589 
2590   if (kEmitCompilerReadBarrier) {
2591     DCHECK(kUseBakerReadBarrier);
2592 
2593     vixl32::Label calculate_result;
2594     __ Subs(out, tmp, other);
2595     __ B(eq, &calculate_result);  // `out` is 0 if taken.
2596 
2597     // Check if the loaded reference is null.
2598     __ Cmp(tmp, 0);
2599     __ B(eq, &calculate_result);  // `out` is not 0 if taken.
2600 
2601     // For correct memory visibility, we need a barrier before loading the lock word
2602     // but we already have the barrier emitted for volatile load above which is sufficient.
2603 
2604     // Load the lockword and check if it is a forwarding address.
2605     static_assert(LockWord::kStateShift == 30u);
2606     static_assert(LockWord::kStateForwardingAddress == 3u);
2607     __ Ldr(tmp, MemOperand(tmp, monitor_offset));
2608     __ Cmp(tmp, Operand(0xc0000000));
2609     __ B(lo, &calculate_result);   // `out` is not 0 if taken.
2610 
2611     // Extract the forwarding address and subtract from `other`.
2612     __ Sub(out, other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
2613 
2614     __ Bind(&calculate_result);
2615   } else {
2616     DCHECK(!kEmitCompilerReadBarrier);
2617     __ Sub(out, tmp, other);
2618   }
2619 
2620   // Convert 0 to 1 and non-zero to 0 for the Boolean result (`out = (out == 0)`).
2621   __ Clz(out, out);
2622   __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
2623 }
2624 
VisitThreadInterrupted(HInvoke * invoke)2625 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2626   LocationSummary* locations =
2627       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2628   locations->SetOut(Location::RequiresRegister());
2629 }
2630 
VisitThreadInterrupted(HInvoke * invoke)2631 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2632   ArmVIXLAssembler* assembler = GetAssembler();
2633   vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
2634   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
2635   __ Ldr(out, MemOperand(tr, offset));
2636   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2637   vixl32::Register temp = temps.Acquire();
2638   vixl32::Label done;
2639   vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
2640   __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
2641   __ Dmb(vixl32::ISH);
2642   __ Mov(temp, 0);
2643   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
2644   __ Dmb(vixl32::ISH);
2645   if (done.IsReferenced()) {
2646     __ Bind(&done);
2647   }
2648 }
2649 
VisitReachabilityFence(HInvoke * invoke)2650 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
2651   LocationSummary* locations =
2652       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2653   locations->SetInAt(0, Location::Any());
2654 }
2655 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)2656 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
2657 
VisitIntegerDivideUnsigned(HInvoke * invoke)2658 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2659   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
2660 }
2661 
VisitIntegerDivideUnsigned(HInvoke * invoke)2662 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2663   ArmVIXLAssembler* assembler = GetAssembler();
2664   LocationSummary* locations = invoke->GetLocations();
2665   vixl32::Register dividend = RegisterFrom(locations->InAt(0));
2666   vixl32::Register divisor = RegisterFrom(locations->InAt(1));
2667   vixl32::Register out = RegisterFrom(locations->Out());
2668 
2669   // Check if divisor is zero, bail to managed implementation to handle.
2670   SlowPathCodeARMVIXL* slow_path =
2671       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2672   codegen_->AddSlowPath(slow_path);
2673   __ CompareAndBranchIfZero(divisor, slow_path->GetEntryLabel());
2674 
2675   __ Udiv(out, dividend, divisor);
2676 
2677   __ Bind(slow_path->GetExitLabel());
2678 }
2679 
Use64BitExclusiveLoadStore(bool atomic,CodeGeneratorARMVIXL * codegen)2680 static inline bool Use64BitExclusiveLoadStore(bool atomic, CodeGeneratorARMVIXL* codegen) {
2681   return atomic && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
2682 }
2683 
GenerateIntrinsicGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location out,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2684 static void GenerateIntrinsicGet(HInvoke* invoke,
2685                                  CodeGeneratorARMVIXL* codegen,
2686                                  DataType::Type type,
2687                                  std::memory_order order,
2688                                  bool atomic,
2689                                  vixl32::Register base,
2690                                  vixl32::Register offset,
2691                                  Location out,
2692                                  Location maybe_temp,
2693                                  Location maybe_temp2,
2694                                  Location maybe_temp3) {
2695   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2696   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2697   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2698   DCHECK(atomic || order == std::memory_order_relaxed);
2699 
2700   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2701   MemOperand address(base, offset);
2702   switch (type) {
2703     case DataType::Type::kBool:
2704       __ Ldrb(RegisterFrom(out), address);
2705       break;
2706     case DataType::Type::kInt8:
2707       __ Ldrsb(RegisterFrom(out), address);
2708       break;
2709     case DataType::Type::kUint16:
2710       __ Ldrh(RegisterFrom(out), address);
2711       break;
2712     case DataType::Type::kInt16:
2713       __ Ldrsh(RegisterFrom(out), address);
2714       break;
2715     case DataType::Type::kInt32:
2716       __ Ldr(RegisterFrom(out), address);
2717       break;
2718     case DataType::Type::kInt64:
2719       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2720         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp);
2721         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2722         const vixl32::Register temp_reg = temps.Acquire();
2723         __ Add(temp_reg, base, offset);
2724         vixl32::Label loop;
2725         __ Bind(&loop);
2726         __ Ldrexd(LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2727         __ Strexd(strexd_tmp, LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2728         __ Cmp(strexd_tmp, 0);
2729         __ B(ne, &loop);
2730       } else {
2731         __ Ldrd(LowRegisterFrom(out), HighRegisterFrom(out), address);
2732       }
2733       break;
2734     case DataType::Type::kReference:
2735       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2736         // Piggy-back on the field load path using introspection for the Baker read barrier.
2737         vixl32::Register temp = RegisterFrom(maybe_temp);
2738         __ Add(temp, base, offset);
2739         codegen->GenerateFieldLoadWithBakerReadBarrier(
2740             invoke, out, base, MemOperand(temp), /* needs_null_check= */ false);
2741       } else {
2742         __ Ldr(RegisterFrom(out), address);
2743       }
2744       break;
2745     case DataType::Type::kFloat32: {
2746       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2747       const vixl32::Register temp_reg = temps.Acquire();
2748       __ Add(temp_reg, base, offset);
2749       __ Vldr(SRegisterFrom(out), MemOperand(temp_reg));
2750       break;
2751     }
2752     case DataType::Type::kFloat64: {
2753       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2754       const vixl32::Register temp_reg = temps.Acquire();
2755       __ Add(temp_reg, base, offset);
2756       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2757         vixl32::Register lo = RegisterFrom(maybe_temp);
2758         vixl32::Register hi = RegisterFrom(maybe_temp2);
2759         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2760         vixl32::Label loop;
2761         __ Bind(&loop);
2762         __ Ldrexd(lo, hi, MemOperand(temp_reg));
2763         __ Strexd(strexd_tmp, lo, hi, MemOperand(temp_reg));
2764         __ Cmp(strexd_tmp, 0);
2765         __ B(ne, &loop);
2766         __ Vmov(DRegisterFrom(out), lo, hi);
2767       } else {
2768         __ Vldr(DRegisterFrom(out), MemOperand(temp_reg));
2769       }
2770       break;
2771     }
2772     default:
2773       LOG(FATAL) << "Unexpected type " << type;
2774       UNREACHABLE();
2775   }
2776   if (acquire_barrier) {
2777     codegen->GenerateMemoryBarrier(
2778         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
2779   }
2780   if (type == DataType::Type::kReference && !(kEmitCompilerReadBarrier && kUseBakerReadBarrier)) {
2781     Location base_loc = LocationFrom(base);
2782     Location index_loc = LocationFrom(offset);
2783     codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
2784   }
2785 }
2786 
CreateUnsafeGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2787 static void CreateUnsafeGetLocations(HInvoke* invoke,
2788                                      CodeGeneratorARMVIXL* codegen,
2789                                      DataType::Type type,
2790                                      bool atomic) {
2791   bool can_call = kEmitCompilerReadBarrier &&
2792       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
2793        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
2794   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2795   LocationSummary* locations =
2796       new (allocator) LocationSummary(invoke,
2797                                       can_call
2798                                           ? LocationSummary::kCallOnSlowPath
2799                                           : LocationSummary::kNoCall,
2800                                       kIntrinsified);
2801   if (can_call && kUseBakerReadBarrier) {
2802     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2803   }
2804   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2805   locations->SetInAt(1, Location::RequiresRegister());
2806   locations->SetInAt(2, Location::RequiresRegister());
2807   locations->SetOut(Location::RequiresRegister(),
2808                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2809   if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
2810       (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2811     // We need a temporary register for the read barrier marking slow
2812     // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
2813     // or the STREXD result for LDREXD/STREXD sequence when LDRD is non-atomic.
2814     locations->AddTemp(Location::RequiresRegister());
2815   }
2816 }
2817 
GenUnsafeGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic)2818 static void GenUnsafeGet(HInvoke* invoke,
2819                          CodeGeneratorARMVIXL* codegen,
2820                          DataType::Type type,
2821                          std::memory_order order,
2822                          bool atomic) {
2823   LocationSummary* locations = invoke->GetLocations();
2824   vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
2825   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
2826   Location out = locations->Out();
2827   Location maybe_temp = Location::NoLocation();
2828   if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
2829       (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2830     maybe_temp = locations->GetTemp(0);
2831   }
2832   GenerateIntrinsicGet(invoke,
2833                        codegen,
2834                        type,
2835                        order,
2836                        atomic,
2837                        base,
2838                        offset,
2839                        out,
2840                        maybe_temp,
2841                        /*maybe_temp2=*/ Location::NoLocation(),
2842                        /*maybe_temp3=*/ Location::NoLocation());
2843 }
2844 
VisitUnsafeGet(HInvoke * invoke)2845 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2846   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
2847 }
2848 
VisitUnsafeGet(HInvoke * invoke)2849 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2850   GenUnsafeGet(
2851       invoke, codegen_, DataType::Type::kInt32, std::memory_order_relaxed, /*atomic=*/ false);
2852 }
2853 
VisitUnsafeGetVolatile(HInvoke * invoke)2854 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2855   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2856 }
2857 
VisitUnsafeGetVolatile(HInvoke * invoke)2858 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2859   GenUnsafeGet(
2860       invoke, codegen_, DataType::Type::kInt32, std::memory_order_seq_cst, /*atomic=*/ true);
2861 }
2862 
VisitUnsafeGetLong(HInvoke * invoke)2863 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2864   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
2865 }
2866 
VisitUnsafeGetLong(HInvoke * invoke)2867 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2868   GenUnsafeGet(
2869       invoke, codegen_, DataType::Type::kInt64, std::memory_order_relaxed, /*atomic=*/ false);
2870 }
2871 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2872 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2873   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2874 }
2875 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2876 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2877   GenUnsafeGet(
2878       invoke, codegen_, DataType::Type::kInt64, std::memory_order_seq_cst, /*atomic=*/ true);
2879 }
2880 
VisitUnsafeGetObject(HInvoke * invoke)2881 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2882   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
2883 }
2884 
VisitUnsafeGetObject(HInvoke * invoke)2885 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2886   GenUnsafeGet(
2887       invoke, codegen_, DataType::Type::kReference, std::memory_order_relaxed, /*atomic=*/ false);
2888 }
2889 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2890 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2891   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2892 }
2893 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2894 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2895   GenUnsafeGet(
2896       invoke, codegen_, DataType::Type::kReference, std::memory_order_seq_cst, /*atomic=*/ true);
2897 }
2898 
GenerateIntrinsicSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location value,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2899 static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
2900                                  DataType::Type type,
2901                                  std::memory_order order,
2902                                  bool atomic,
2903                                  vixl32::Register base,
2904                                  vixl32::Register offset,
2905                                  Location value,
2906                                  Location maybe_temp,
2907                                  Location maybe_temp2,
2908                                  Location maybe_temp3) {
2909   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2910   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
2911   DCHECK(release_barrier || order == std::memory_order_relaxed);
2912   DCHECK(atomic || order == std::memory_order_relaxed);
2913 
2914   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2915   if (release_barrier) {
2916     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
2917   }
2918   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2919   if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2920     vixl32::Register temp = temps.Acquire();
2921     __ Mov(temp, RegisterFrom(value));
2922     assembler->PoisonHeapReference(temp);
2923     value = LocationFrom(temp);
2924   }
2925   MemOperand address = offset.IsValid() ? MemOperand(base, offset) : MemOperand(base);
2926   if (offset.IsValid() && (DataType::Is64BitType(type) || type == DataType::Type::kFloat32)) {
2927     const vixl32::Register temp_reg = temps.Acquire();
2928     __ Add(temp_reg, base, offset);
2929     address = MemOperand(temp_reg);
2930   }
2931   switch (type) {
2932     case DataType::Type::kBool:
2933     case DataType::Type::kInt8:
2934       __ Strb(RegisterFrom(value), address);
2935       break;
2936     case DataType::Type::kUint16:
2937     case DataType::Type::kInt16:
2938       __ Strh(RegisterFrom(value), address);
2939       break;
2940     case DataType::Type::kReference:
2941     case DataType::Type::kInt32:
2942       __ Str(RegisterFrom(value), address);
2943       break;
2944     case DataType::Type::kInt64:
2945       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2946         vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2947         vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2948         vixl32::Label loop;
2949         __ Bind(&loop);
2950         __ Ldrexd(lo_tmp, hi_tmp, address);  // Ignore the retrieved value.
2951         __ Strexd(lo_tmp, LowRegisterFrom(value), HighRegisterFrom(value), address);
2952         __ Cmp(lo_tmp, 0);
2953         __ B(ne, &loop);
2954       } else {
2955         __ Strd(LowRegisterFrom(value), HighRegisterFrom(value), address);
2956       }
2957       break;
2958     case DataType::Type::kFloat32:
2959       __ Vstr(SRegisterFrom(value), address);
2960       break;
2961     case DataType::Type::kFloat64:
2962       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2963         vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
2964         vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
2965         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2966         vixl32::Label loop;
2967         __ Bind(&loop);
2968         __ Ldrexd(lo_tmp, hi_tmp, address);  // Ignore the retrieved value.
2969         __ Vmov(lo_tmp, hi_tmp, DRegisterFrom(value));
2970         __ Strexd(strexd_tmp, lo_tmp, hi_tmp, address);
2971         __ Cmp(strexd_tmp, 0);
2972         __ B(ne, &loop);
2973       } else {
2974         __ Vstr(DRegisterFrom(value), address);
2975       }
2976       break;
2977     default:
2978       LOG(FATAL) << "Unexpected type " << type;
2979       UNREACHABLE();
2980   }
2981   if (seq_cst_barrier) {
2982     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
2983   }
2984 }
2985 
CreateUnsafePutLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2986 static void CreateUnsafePutLocations(HInvoke* invoke,
2987                                      CodeGeneratorARMVIXL* codegen,
2988                                      DataType::Type type,
2989                                      bool atomic) {
2990   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2991   LocationSummary* locations =
2992       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2993   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2994   locations->SetInAt(1, Location::RequiresRegister());
2995   locations->SetInAt(2, Location::RequiresRegister());
2996   locations->SetInAt(3, Location::RequiresRegister());
2997 
2998   if (type == DataType::Type::kInt64) {
2999     // Potentially need temps for ldrexd-strexd loop.
3000     if (Use64BitExclusiveLoadStore(atomic, codegen)) {
3001       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
3002       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
3003     }
3004   } else if (type == DataType::Type::kReference) {
3005     // Temp for card-marking.
3006     locations->AddTemp(Location::RequiresRegister());  // Temp.
3007   }
3008 }
3009 
GenUnsafePut(HInvoke * invoke,DataType::Type type,std::memory_order order,bool atomic,CodeGeneratorARMVIXL * codegen)3010 static void GenUnsafePut(HInvoke* invoke,
3011                          DataType::Type type,
3012                          std::memory_order order,
3013                          bool atomic,
3014                          CodeGeneratorARMVIXL* codegen) {
3015   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3016 
3017   LocationSummary* locations = invoke->GetLocations();
3018   vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
3019   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
3020   Location value = locations->InAt(3);
3021   Location maybe_temp = Location::NoLocation();
3022   Location maybe_temp2 = Location::NoLocation();
3023   if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
3024     maybe_temp = locations->GetTemp(0);
3025     maybe_temp2 = locations->GetTemp(1);
3026   }
3027 
3028   GenerateIntrinsicSet(codegen,
3029                        type,
3030                        order,
3031                        atomic,
3032                        base,
3033                        offset,
3034                        value,
3035                        maybe_temp,
3036                        maybe_temp2,
3037                        /*maybe_temp3=*/ Location::NoLocation());
3038 
3039   if (type == DataType::Type::kReference) {
3040     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3041     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3042     vixl32::Register card = temps.Acquire();
3043     bool value_can_be_null = true;  // TODO: Worth finding out this information?
3044     codegen->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
3045   }
3046 }
3047 
VisitUnsafePut(HInvoke * invoke)3048 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3049   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
3050 }
3051 
VisitUnsafePut(HInvoke * invoke)3052 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3053   GenUnsafePut(invoke,
3054                DataType::Type::kInt32,
3055                std::memory_order_relaxed,
3056                /*atomic=*/ false,
3057                codegen_);
3058 }
3059 
VisitUnsafePutOrdered(HInvoke * invoke)3060 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3061   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3062 }
3063 
VisitUnsafePutOrdered(HInvoke * invoke)3064 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3065   GenUnsafePut(invoke,
3066                DataType::Type::kInt32,
3067                std::memory_order_release,
3068                /*atomic=*/ true,
3069                codegen_);
3070 }
3071 
VisitUnsafePutVolatile(HInvoke * invoke)3072 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3073   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3074 }
3075 
VisitUnsafePutVolatile(HInvoke * invoke)3076 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3077   GenUnsafePut(invoke,
3078                DataType::Type::kInt32,
3079                std::memory_order_seq_cst,
3080                /*atomic=*/ true,
3081                codegen_);
3082 }
VisitUnsafePutObject(HInvoke * invoke)3083 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3084   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
3085 }
3086 
VisitUnsafePutObject(HInvoke * invoke)3087 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3088   GenUnsafePut(invoke,
3089                DataType::Type::kReference,
3090                std::memory_order_relaxed,
3091                /*atomic=*/ false,
3092                codegen_);
3093 }
3094 
VisitUnsafePutObjectOrdered(HInvoke * invoke)3095 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3096   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3097 }
3098 
VisitUnsafePutObjectOrdered(HInvoke * invoke)3099 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3100   GenUnsafePut(invoke,
3101                DataType::Type::kReference,
3102                std::memory_order_release,
3103                /*atomic=*/ true,
3104                codegen_);
3105 }
3106 
VisitUnsafePutObjectVolatile(HInvoke * invoke)3107 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3108   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3109 }
3110 
VisitUnsafePutObjectVolatile(HInvoke * invoke)3111 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3112   GenUnsafePut(invoke,
3113                DataType::Type::kReference,
3114                std::memory_order_seq_cst,
3115                /*atomic=*/ true,
3116                codegen_);
3117 }
3118 
VisitUnsafePutLong(HInvoke * invoke)3119 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3120   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
3121 }
3122 
VisitUnsafePutLong(HInvoke * invoke)3123 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3124   GenUnsafePut(invoke,
3125                DataType::Type::kInt64,
3126                std::memory_order_relaxed,
3127                /*atomic=*/ false,
3128                codegen_);
3129 }
3130 
VisitUnsafePutLongOrdered(HInvoke * invoke)3131 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3132   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3133 }
3134 
VisitUnsafePutLongOrdered(HInvoke * invoke)3135 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3136   GenUnsafePut(invoke,
3137                DataType::Type::kInt64,
3138                std::memory_order_release,
3139                /*atomic=*/ true,
3140                codegen_);
3141 }
3142 
VisitUnsafePutLongVolatile(HInvoke * invoke)3143 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3144   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3145 }
3146 
VisitUnsafePutLongVolatile(HInvoke * invoke)3147 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3148   GenUnsafePut(invoke,
3149                DataType::Type::kInt64,
3150                std::memory_order_seq_cst,
3151                /*atomic=*/ true,
3152                codegen_);
3153 }
3154 
EmitLoadExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,Location old_value)3155 static void EmitLoadExclusive(CodeGeneratorARMVIXL* codegen,
3156                               DataType::Type type,
3157                               vixl32::Register ptr,
3158                               Location old_value) {
3159   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3160   switch (type) {
3161     case DataType::Type::kBool:
3162     case DataType::Type::kInt8:
3163       __ Ldrexb(RegisterFrom(old_value), MemOperand(ptr));
3164       break;
3165     case DataType::Type::kUint16:
3166     case DataType::Type::kInt16:
3167       __ Ldrexh(RegisterFrom(old_value), MemOperand(ptr));
3168       break;
3169     case DataType::Type::kInt32:
3170     case DataType::Type::kReference:
3171       __ Ldrex(RegisterFrom(old_value), MemOperand(ptr));
3172       break;
3173     case DataType::Type::kInt64:
3174       __ Ldrexd(LowRegisterFrom(old_value), HighRegisterFrom(old_value), MemOperand(ptr));
3175       break;
3176     default:
3177       LOG(FATAL) << "Unexpected type: " << type;
3178       UNREACHABLE();
3179   }
3180   switch (type) {
3181     case DataType::Type::kInt8:
3182       __ Sxtb(RegisterFrom(old_value), RegisterFrom(old_value));
3183       break;
3184     case DataType::Type::kInt16:
3185       __ Sxth(RegisterFrom(old_value), RegisterFrom(old_value));
3186       break;
3187     case DataType::Type::kReference:
3188       assembler->MaybeUnpoisonHeapReference(RegisterFrom(old_value));
3189       break;
3190     default:
3191       break;
3192   }
3193 }
3194 
EmitStoreExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,vixl32::Register store_result,Location new_value)3195 static void EmitStoreExclusive(CodeGeneratorARMVIXL* codegen,
3196                                DataType::Type type,
3197                                vixl32::Register ptr,
3198                                vixl32::Register store_result,
3199                                Location new_value) {
3200   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3201   if (type == DataType::Type::kReference) {
3202     assembler->MaybePoisonHeapReference(RegisterFrom(new_value));
3203   }
3204   switch (type) {
3205     case DataType::Type::kBool:
3206     case DataType::Type::kInt8:
3207       __ Strexb(store_result, RegisterFrom(new_value), MemOperand(ptr));
3208       break;
3209     case DataType::Type::kUint16:
3210     case DataType::Type::kInt16:
3211       __ Strexh(store_result, RegisterFrom(new_value), MemOperand(ptr));
3212       break;
3213     case DataType::Type::kInt32:
3214     case DataType::Type::kReference:
3215       __ Strex(store_result, RegisterFrom(new_value), MemOperand(ptr));
3216       break;
3217     case DataType::Type::kInt64:
3218       __ Strexd(
3219           store_result, LowRegisterFrom(new_value), HighRegisterFrom(new_value), MemOperand(ptr));
3220       break;
3221     default:
3222       LOG(FATAL) << "Unexpected type: " << type;
3223       UNREACHABLE();
3224   }
3225   if (type == DataType::Type::kReference) {
3226     assembler->MaybeUnpoisonHeapReference(RegisterFrom(new_value));
3227   }
3228 }
3229 
GenerateCompareAndSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,bool strong,vixl32::Label * cmp_failure,bool cmp_failure_is_far_target,vixl32::Register ptr,Location expected,Location new_value,Location old_value,vixl32::Register store_result,vixl32::Register success)3230 static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
3231                                   DataType::Type type,
3232                                   bool strong,
3233                                   vixl32::Label* cmp_failure,
3234                                   bool cmp_failure_is_far_target,
3235                                   vixl32::Register ptr,
3236                                   Location expected,
3237                                   Location new_value,
3238                                   Location old_value,
3239                                   vixl32::Register store_result,
3240                                   vixl32::Register success) {
3241   // For kReference, the `expected` shall be a register pair when called from a read barrier
3242   // slow path, specifying both the original `expected` as well as the unmarked old value from
3243   // the main path attempt to emit CAS when it matched `expected` after marking.
3244   // Otherwise the type of `expected` shall match the type of `new_value` and `old_value`.
3245   if (type == DataType::Type::kInt64) {
3246     DCHECK(expected.IsRegisterPair());
3247     DCHECK(new_value.IsRegisterPair());
3248     DCHECK(old_value.IsRegisterPair());
3249   } else {
3250     DCHECK(expected.IsRegister() ||
3251            (type == DataType::Type::kReference && expected.IsRegisterPair()));
3252     DCHECK(new_value.IsRegister());
3253     DCHECK(old_value.IsRegister());
3254   }
3255 
3256   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3257 
3258   // do {
3259   //   old_value = [ptr];  // Load exclusive.
3260   //   if (old_value != expected) goto cmp_failure;
3261   //   store_result = failed([ptr] <- new_value);  // Store exclusive.
3262   // } while (strong && store_result);
3263   //
3264   // If `success` is a valid register, there are additional instructions in the above code
3265   // to report success with value 1 and failure with value 0 in that register.
3266 
3267   vixl32::Label loop_head;
3268   if (strong) {
3269     __ Bind(&loop_head);
3270   }
3271   EmitLoadExclusive(codegen, type, ptr, old_value);
3272   // We do not need to initialize the failure code for comparison failure if the
3273   // branch goes to the read barrier slow path that clobbers `success` anyway.
3274   bool init_failure_for_cmp =
3275       success.IsValid() &&
3276       !(kEmitCompilerReadBarrier && type == DataType::Type::kReference && expected.IsRegister());
3277   // Instruction scheduling: Loading a constant between LDREX* and using the loaded value
3278   // is essentially free, so prepare the failure value here if we can.
3279   bool init_failure_for_cmp_early =
3280       init_failure_for_cmp && !old_value.Contains(LocationFrom(success));
3281   if (init_failure_for_cmp_early) {
3282     __ Mov(success, 0);  // Indicate failure if the comparison fails.
3283   }
3284   if (type == DataType::Type::kInt64) {
3285     __ Cmp(LowRegisterFrom(old_value), LowRegisterFrom(expected));
3286     ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3287     __ it(eq);
3288     __ cmp(eq, HighRegisterFrom(old_value), HighRegisterFrom(expected));
3289   } else if (expected.IsRegisterPair()) {
3290     DCHECK_EQ(type, DataType::Type::kReference);
3291     // Check if the loaded value matches any of the two registers in `expected`.
3292     __ Cmp(RegisterFrom(old_value), LowRegisterFrom(expected));
3293     ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3294     __ it(ne);
3295     __ cmp(ne, RegisterFrom(old_value), HighRegisterFrom(expected));
3296   } else {
3297     __ Cmp(RegisterFrom(old_value), RegisterFrom(expected));
3298   }
3299   if (init_failure_for_cmp && !init_failure_for_cmp_early) {
3300     __ Mov(LeaveFlags, success, 0);  // Indicate failure if the comparison fails.
3301   }
3302   __ B(ne, cmp_failure, /*is_far_target=*/ cmp_failure_is_far_target);
3303   EmitStoreExclusive(codegen, type, ptr, store_result, new_value);
3304   if (strong) {
3305     // Instruction scheduling: Loading a constant between STREX* and using its result
3306     // is essentially free, so prepare the success value here if needed.
3307     if (success.IsValid()) {
3308       DCHECK(!success.Is(store_result));
3309       __ Mov(success, 1);  // Indicate success if the store succeeds.
3310     }
3311     __ Cmp(store_result, 0);
3312     __ B(ne, &loop_head, /*is_far_target=*/ false);
3313   } else {
3314     // Weak CAS (VarHandle.CompareAndExchange variants) always indicates success.
3315     DCHECK(success.IsValid());
3316     // Flip the `store_result` to indicate success by 1 and failure by 0.
3317     __ Eor(success, store_result, 1);
3318   }
3319 }
3320 
3321 class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
3322  public:
ReadBarrierCasSlowPathARMVIXL(HInvoke * invoke,bool strong,vixl32::Register base,vixl32::Register offset,vixl32::Register expected,vixl32::Register new_value,vixl32::Register old_value,vixl32::Register old_value_temp,vixl32::Register store_result,vixl32::Register success,CodeGeneratorARMVIXL * arm_codegen)3323   explicit ReadBarrierCasSlowPathARMVIXL(HInvoke* invoke,
3324                                          bool strong,
3325                                          vixl32::Register base,
3326                                          vixl32::Register offset,
3327                                          vixl32::Register expected,
3328                                          vixl32::Register new_value,
3329                                          vixl32::Register old_value,
3330                                          vixl32::Register old_value_temp,
3331                                          vixl32::Register store_result,
3332                                          vixl32::Register success,
3333                                          CodeGeneratorARMVIXL* arm_codegen)
3334       : SlowPathCodeARMVIXL(invoke),
3335         strong_(strong),
3336         base_(base),
3337         offset_(offset),
3338         expected_(expected),
3339         new_value_(new_value),
3340         old_value_(old_value),
3341         old_value_temp_(old_value_temp),
3342         store_result_(store_result),
3343         success_(success),
3344         mark_old_value_slow_path_(nullptr),
3345         update_old_value_slow_path_(nullptr) {
3346     if (!kUseBakerReadBarrier) {
3347       // We need to add the slow path now, it is too late when emitting slow path code.
3348       mark_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3349           invoke,
3350           Location::RegisterLocation(old_value_temp.GetCode()),
3351           Location::RegisterLocation(old_value.GetCode()),
3352           Location::RegisterLocation(base.GetCode()),
3353           /*offset=*/ 0u,
3354           /*index=*/ Location::RegisterLocation(offset.GetCode()));
3355       if (!success.IsValid()) {
3356         update_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3357             invoke,
3358             Location::RegisterLocation(old_value.GetCode()),
3359             Location::RegisterLocation(old_value_temp.GetCode()),
3360             Location::RegisterLocation(base.GetCode()),
3361             /*offset=*/ 0u,
3362             /*index=*/ Location::RegisterLocation(offset.GetCode()));
3363       }
3364     }
3365   }
3366 
GetDescription() const3367   const char* GetDescription() const override { return "ReadBarrierCasSlowPathARMVIXL"; }
3368 
EmitNativeCode(CodeGenerator * codegen)3369   void EmitNativeCode(CodeGenerator* codegen) override {
3370     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
3371     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
3372     __ Bind(GetEntryLabel());
3373 
3374     // Mark the `old_value_` from the main path and compare with `expected_`.
3375     if (kUseBakerReadBarrier) {
3376       DCHECK(mark_old_value_slow_path_ == nullptr);
3377       arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
3378     } else {
3379       DCHECK(mark_old_value_slow_path_ != nullptr);
3380       __ B(mark_old_value_slow_path_->GetEntryLabel());
3381       __ Bind(mark_old_value_slow_path_->GetExitLabel());
3382     }
3383     __ Cmp(old_value_temp_, expected_);
3384     if (success_.IsValid()) {
3385       __ Mov(LeaveFlags, success_, 0);  // Indicate failure if we take the branch out.
3386     } else {
3387       // In case of failure, update the `old_value_` with the marked reference.
3388       ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3389       __ it(ne);
3390       __ mov(ne, old_value_, old_value_temp_);
3391     }
3392     __ B(ne, GetExitLabel());
3393 
3394     // The old value we have read did not match `expected` (which is always a to-space
3395     // reference) but after the read barrier the marked to-space value matched, so the
3396     // old value must be a from-space reference to the same object. Do the same CAS loop
3397     // as the main path but check for both `expected` and the unmarked old value
3398     // representing the to-space and from-space references for the same object.
3399 
3400     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3401     vixl32::Register tmp_ptr = temps.Acquire();
3402 
3403     // Recalculate the `tmp_ptr` clobbered above.
3404     __ Add(tmp_ptr, base_, offset_);
3405 
3406     vixl32::Label mark_old_value;
3407     GenerateCompareAndSet(arm_codegen,
3408                           DataType::Type::kReference,
3409                           strong_,
3410                           /*cmp_failure=*/ success_.IsValid() ? GetExitLabel() : &mark_old_value,
3411                           /*cmp_failure_is_far_target=*/ success_.IsValid(),
3412                           tmp_ptr,
3413                           /*expected=*/ LocationFrom(expected_, old_value_),
3414                           /*new_value=*/ LocationFrom(new_value_),
3415                           /*old_value=*/ LocationFrom(old_value_temp_),
3416                           store_result_,
3417                           success_);
3418     if (!success_.IsValid()) {
3419       // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
3420       // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
3421       __ Mov(old_value_, expected_);
3422     }
3423 
3424     __ B(GetExitLabel());
3425 
3426     if (!success_.IsValid()) {
3427       __ Bind(&mark_old_value);
3428       if (kUseBakerReadBarrier) {
3429         DCHECK(update_old_value_slow_path_ == nullptr);
3430         arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
3431       } else {
3432         // Note: We could redirect the `failure` above directly to the entry label and bind
3433         // the exit label in the main path, but the main path would need to access the
3434         // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
3435         DCHECK(update_old_value_slow_path_ != nullptr);
3436         __ B(update_old_value_slow_path_->GetEntryLabel());
3437         __ Bind(update_old_value_slow_path_->GetExitLabel());
3438       }
3439       __ B(GetExitLabel());
3440     }
3441   }
3442 
3443  private:
3444   bool strong_;
3445   vixl32::Register base_;
3446   vixl32::Register offset_;
3447   vixl32::Register expected_;
3448   vixl32::Register new_value_;
3449   vixl32::Register old_value_;
3450   vixl32::Register old_value_temp_;
3451   vixl32::Register store_result_;
3452   vixl32::Register success_;
3453   SlowPathCodeARMVIXL* mark_old_value_slow_path_;
3454   SlowPathCodeARMVIXL* update_old_value_slow_path_;
3455 };
3456 
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke)3457 static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3458   bool can_call = kEmitCompilerReadBarrier &&
3459       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
3460   LocationSummary* locations =
3461       new (allocator) LocationSummary(invoke,
3462                                       can_call
3463                                           ? LocationSummary::kCallOnSlowPath
3464                                           : LocationSummary::kNoCall,
3465                                       kIntrinsified);
3466   if (can_call && kUseBakerReadBarrier) {
3467     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3468   }
3469   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3470   locations->SetInAt(1, Location::RequiresRegister());
3471   locations->SetInAt(2, Location::RequiresRegister());
3472   locations->SetInAt(3, Location::RequiresRegister());
3473   locations->SetInAt(4, Location::RequiresRegister());
3474 
3475   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3476 
3477   // Temporary register used in CAS. In the object case (UnsafeCASObject intrinsic),
3478   // this is also used for card-marking, and possibly for read barrier.
3479   locations->AddTemp(Location::RequiresRegister());
3480 }
3481 
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)3482 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
3483   DCHECK_NE(type, DataType::Type::kInt64);
3484 
3485   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3486   LocationSummary* locations = invoke->GetLocations();
3487 
3488   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
3489   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
3490   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
3491   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
3492   vixl32::Register new_value = InputRegisterAt(invoke, 4);            // New value.
3493 
3494   vixl32::Register tmp = RegisterFrom(locations->GetTemp(0));         // Temporary.
3495 
3496   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3497   vixl32::Register tmp_ptr = temps.Acquire();
3498 
3499   if (type == DataType::Type::kReference) {
3500     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
3501     // object and scan the receiver at the next GC for nothing.
3502     bool value_can_be_null = true;  // TODO: Worth finding out this information?
3503     codegen->MarkGCCard(tmp_ptr, tmp, base, new_value, value_can_be_null);
3504   }
3505 
3506   vixl32::Label exit_loop_label;
3507   vixl32::Label* exit_loop = &exit_loop_label;
3508   vixl32::Label* cmp_failure = &exit_loop_label;
3509 
3510   if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
3511     // If marking, check if the stored reference is a from-space reference to the same
3512     // object as the to-space reference `expected`. If so, perform a custom CAS loop.
3513     ReadBarrierCasSlowPathARMVIXL* slow_path =
3514         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
3515             invoke,
3516             /*strong=*/ true,
3517             base,
3518             offset,
3519             expected,
3520             new_value,
3521             /*old_value=*/ tmp,
3522             /*old_value_temp=*/ out,
3523             /*store_result=*/ tmp,
3524             /*success=*/ out,
3525             codegen);
3526     codegen->AddSlowPath(slow_path);
3527     exit_loop = slow_path->GetExitLabel();
3528     cmp_failure = slow_path->GetEntryLabel();
3529   }
3530 
3531   // Unsafe CAS operations have std::memory_order_seq_cst semantics.
3532   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3533   __ Add(tmp_ptr, base, offset);
3534   GenerateCompareAndSet(codegen,
3535                         type,
3536                         /*strong=*/ true,
3537                         cmp_failure,
3538                         /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
3539                         tmp_ptr,
3540                         /*expected=*/ LocationFrom(expected),  // TODO: Int64
3541                         /*new_value=*/ LocationFrom(new_value),  // TODO: Int64
3542                         /*old_value=*/ LocationFrom(tmp),  // TODO: Int64
3543                         /*store_result=*/ tmp,
3544                         /*success=*/ out);
3545   __ Bind(exit_loop);
3546   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3547 
3548   if (type == DataType::Type::kReference) {
3549     codegen->MaybeGenerateMarkingRegisterCheck(/*code=*/ 128, /*temp_loc=*/ LocationFrom(tmp_ptr));
3550   }
3551 }
3552 
VisitUnsafeCASInt(HInvoke * invoke)3553 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3554   CreateUnsafeCASLocations(allocator_, invoke);
3555 }
VisitUnsafeCASObject(HInvoke * invoke)3556 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3557   // The only read barrier implementation supporting the
3558   // UnsafeCASObject intrinsic is the Baker-style read barriers. b/173104084
3559   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
3560     return;
3561   }
3562 
3563   CreateUnsafeCASLocations(allocator_, invoke);
3564 }
VisitUnsafeCASInt(HInvoke * invoke)3565 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3566   GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
3567 }
VisitUnsafeCASObject(HInvoke * invoke)3568 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3569   GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
3570 }
3571 
3572 enum class GetAndUpdateOp {
3573   kSet,
3574   kAdd,
3575   kAddWithByteSwap,
3576   kAnd,
3577   kOr,
3578   kXor
3579 };
3580 
GenerateGetAndUpdate(CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,vixl32::Register ptr,Location arg,Location old_value,vixl32::Register store_result,Location maybe_temp,Location maybe_vreg_temp)3581 static void GenerateGetAndUpdate(CodeGeneratorARMVIXL* codegen,
3582                                  GetAndUpdateOp get_and_update_op,
3583                                  DataType::Type load_store_type,
3584                                  vixl32::Register ptr,
3585                                  Location arg,
3586                                  Location old_value,
3587                                  vixl32::Register store_result,
3588                                  Location maybe_temp,
3589                                  Location maybe_vreg_temp) {
3590   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3591 
3592   Location loaded_value;
3593   Location new_value;
3594   switch (get_and_update_op) {
3595     case GetAndUpdateOp::kSet:
3596       loaded_value = old_value;
3597       new_value = arg;
3598       break;
3599     case GetAndUpdateOp::kAddWithByteSwap:
3600       if (old_value.IsRegisterPair()) {
3601         // To avoid register overlap when reversing bytes, load into temps.
3602         DCHECK(maybe_temp.IsRegisterPair());
3603         loaded_value = maybe_temp;
3604         new_value = loaded_value;  // Use the same temporaries for the new value.
3605         break;
3606       }
3607       FALLTHROUGH_INTENDED;
3608     case GetAndUpdateOp::kAdd:
3609       if (old_value.IsFpuRegisterPair()) {
3610         DCHECK(maybe_temp.IsRegisterPair());
3611         loaded_value = maybe_temp;
3612         new_value = loaded_value;  // Use the same temporaries for the new value.
3613         break;
3614       }
3615       if (old_value.IsFpuRegister()) {
3616         DCHECK(maybe_temp.IsRegister());
3617         loaded_value = maybe_temp;
3618         new_value = loaded_value;  // Use the same temporary for the new value.
3619         break;
3620       }
3621       FALLTHROUGH_INTENDED;
3622     case GetAndUpdateOp::kAnd:
3623     case GetAndUpdateOp::kOr:
3624     case GetAndUpdateOp::kXor:
3625       loaded_value = old_value;
3626       new_value = maybe_temp;
3627       break;
3628   }
3629 
3630   vixl32::Label loop_label;
3631   __ Bind(&loop_label);
3632   EmitLoadExclusive(codegen, load_store_type, ptr, loaded_value);
3633   switch (get_and_update_op) {
3634     case GetAndUpdateOp::kSet:
3635       break;
3636     case GetAndUpdateOp::kAddWithByteSwap:
3637       if (arg.IsFpuRegisterPair()) {
3638         GenerateReverseBytes(assembler, DataType::Type::kFloat64, loaded_value, old_value);
3639         vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3640         __ Vadd(sum, DRegisterFrom(old_value), DRegisterFrom(arg));
3641         __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), sum);  // Swap low/high.
3642       } else if (arg.IsFpuRegister()) {
3643         GenerateReverseBytes(assembler, DataType::Type::kFloat32, loaded_value, old_value);
3644         vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp);  // The temporary is a pair.
3645         __ Vadd(sum, SRegisterFrom(old_value), SRegisterFrom(arg));
3646         __ Vmov(RegisterFrom(new_value), sum);
3647       } else if (load_store_type == DataType::Type::kInt64) {
3648         GenerateReverseBytes(assembler, DataType::Type::kInt64, loaded_value, old_value);
3649         // Swap low/high registers for the addition results.
3650         __ Adds(HighRegisterFrom(new_value), LowRegisterFrom(old_value), LowRegisterFrom(arg));
3651         __ Adc(LowRegisterFrom(new_value), HighRegisterFrom(old_value), HighRegisterFrom(arg));
3652       } else {
3653         GenerateReverseBytes(assembler, DataType::Type::kInt32, loaded_value, old_value);
3654         __ Add(RegisterFrom(new_value), RegisterFrom(old_value), RegisterFrom(arg));
3655       }
3656       if (load_store_type == DataType::Type::kInt64) {
3657         // The `new_value` already has the high and low word swapped. Reverse bytes in each.
3658         GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
3659       } else {
3660         GenerateReverseBytes(assembler, load_store_type, new_value, new_value);
3661       }
3662       break;
3663     case GetAndUpdateOp::kAdd:
3664       if (arg.IsFpuRegisterPair()) {
3665         vixl32::DRegister old_value_vreg = DRegisterFrom(old_value);
3666         vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3667         __ Vmov(old_value_vreg, LowRegisterFrom(loaded_value), HighRegisterFrom(loaded_value));
3668         __ Vadd(sum, old_value_vreg, DRegisterFrom(arg));
3669         __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), sum);
3670       } else if (arg.IsFpuRegister()) {
3671         vixl32::SRegister old_value_vreg = SRegisterFrom(old_value);
3672         vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp);  // The temporary is a pair.
3673         __ Vmov(old_value_vreg, RegisterFrom(loaded_value));
3674         __ Vadd(sum, old_value_vreg, SRegisterFrom(arg));
3675         __ Vmov(RegisterFrom(new_value), sum);
3676       } else if (load_store_type == DataType::Type::kInt64) {
3677         __ Adds(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3678         __ Adc(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3679       } else {
3680         __ Add(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3681       }
3682       break;
3683     case GetAndUpdateOp::kAnd:
3684       if (load_store_type == DataType::Type::kInt64) {
3685         __ And(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3686         __ And(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3687       } else {
3688         __ And(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3689       }
3690       break;
3691     case GetAndUpdateOp::kOr:
3692       if (load_store_type == DataType::Type::kInt64) {
3693         __ Orr(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3694         __ Orr(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3695       } else {
3696         __ Orr(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3697       }
3698       break;
3699     case GetAndUpdateOp::kXor:
3700       if (load_store_type == DataType::Type::kInt64) {
3701         __ Eor(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3702         __ Eor(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3703       } else {
3704         __ Eor(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3705       }
3706       break;
3707   }
3708   EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value);
3709   __ Cmp(store_result, 0);
3710   __ B(ne, &loop_label);
3711 }
3712 
3713 class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL {
3714  public:
VarHandleSlowPathARMVIXL(HInvoke * invoke,std::memory_order order)3715   VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order)
3716       : IntrinsicSlowPathARMVIXL(invoke),
3717         order_(order),
3718         atomic_(false),
3719         return_success_(false),
3720         strong_(false),
3721         get_and_update_op_(GetAndUpdateOp::kAdd) {
3722   }
3723 
GetByteArrayViewCheckLabel()3724   vixl32::Label* GetByteArrayViewCheckLabel() {
3725     return &byte_array_view_check_label_;
3726   }
3727 
GetNativeByteOrderLabel()3728   vixl32::Label* GetNativeByteOrderLabel() {
3729     return &native_byte_order_label_;
3730   }
3731 
SetAtomic(bool atomic)3732   void SetAtomic(bool atomic) {
3733     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGet ||
3734            GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kSet);
3735     atomic_ = atomic;
3736   }
3737 
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)3738   void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
3739     if (return_success) {
3740       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
3741     } else {
3742       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
3743     }
3744     return_success_ = return_success;
3745     strong_ = strong;
3746   }
3747 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3748   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3749     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
3750     get_and_update_op_ = get_and_update_op;
3751   }
3752 
EmitNativeCode(CodeGenerator * codegen_in)3753   void EmitNativeCode(CodeGenerator* codegen_in) override {
3754     if (GetByteArrayViewCheckLabel()->IsReferenced()) {
3755       EmitByteArrayViewCode(codegen_in);
3756     }
3757     IntrinsicSlowPathARMVIXL::EmitNativeCode(codegen_in);
3758   }
3759 
3760  private:
GetInvoke() const3761   HInvoke* GetInvoke() const {
3762     return GetInstruction()->AsInvoke();
3763   }
3764 
GetAccessModeTemplate() const3765   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3766     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3767   }
3768 
3769   void EmitByteArrayViewCode(CodeGenerator* codegen_in);
3770 
3771   vixl32::Label byte_array_view_check_label_;
3772   vixl32::Label native_byte_order_label_;
3773   // Shared parameter for all VarHandle intrinsics.
3774   std::memory_order order_;
3775   // Extra argument for GenerateVarHandleGet() and GenerateVarHandleSet().
3776   bool atomic_;
3777   // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
3778   bool return_success_;
3779   bool strong_;
3780   // Extra argument for GenerateVarHandleGetAndUpdate().
3781   GetAndUpdateOp get_and_update_op_;
3782 };
3783 
3784 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,vixl32::Register object,vixl32::Register type,bool object_can_be_null=true)3785 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL* codegen,
3786                                                     SlowPathCodeARMVIXL* slow_path,
3787                                                     vixl32::Register object,
3788                                                     vixl32::Register type,
3789                                                     bool object_can_be_null = true) {
3790   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3791 
3792   const MemberOffset class_offset = mirror::Object::ClassOffset();
3793   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3794 
3795   vixl32::Label success;
3796   if (object_can_be_null) {
3797     __ CompareAndBranchIfZero(object, &success, /*is_far_target=*/ false);
3798   }
3799 
3800   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3801   vixl32::Register temp = temps.Acquire();
3802 
3803   __ Ldr(temp, MemOperand(object, class_offset.Int32Value()));
3804   assembler->MaybeUnpoisonHeapReference(temp);
3805   vixl32::Label loop;
3806   __ Bind(&loop);
3807   __ Cmp(type, temp);
3808   __ B(eq, &success, /*is_far_target=*/ false);
3809   __ Ldr(temp, MemOperand(temp, super_class_offset.Int32Value()));
3810   assembler->MaybeUnpoisonHeapReference(temp);
3811   __ Cmp(temp, 0);
3812   __ B(eq, slow_path->GetEntryLabel());
3813   __ B(&loop);
3814   __ Bind(&success);
3815 }
3816 
3817 // Check access mode and the primitive type from VarHandle.varType.
3818 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3819 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,DataType::Type type)3820 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3821                                                         CodeGeneratorARMVIXL* codegen,
3822                                                         SlowPathCodeARMVIXL* slow_path,
3823                                                         DataType::Type type) {
3824   mirror::VarHandle::AccessMode access_mode =
3825       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3826   Primitive::Type primitive_type = DataTypeToPrimitive(type);
3827 
3828   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3829   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
3830 
3831   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3832   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3833   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3834 
3835   // Use the temporary register reserved for offset. It is not used yet at this point.
3836   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3837   vixl32::Register var_type_no_rb =
3838       RegisterFrom(invoke->GetLocations()->GetTemp(expected_coordinates_count == 0u ? 1u : 0u));
3839 
3840   // Check that the operation is permitted and the primitive type of varhandle.varType.
3841   // We do not need a read barrier when loading a reference only for loading constant
3842   // primitive field through the reference. Use LDRD to load the fields together.
3843   {
3844     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3845     vixl32::Register temp2 = temps.Acquire();
3846     DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
3847     __ Ldrd(var_type_no_rb, temp2, MemOperand(varhandle, var_type_offset.Int32Value()));
3848     assembler->MaybeUnpoisonHeapReference(var_type_no_rb);
3849     __ Tst(temp2, 1u << static_cast<uint32_t>(access_mode));
3850     __ B(eq, slow_path->GetEntryLabel());
3851     __ Ldrh(temp2, MemOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
3852     __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
3853     __ B(ne, slow_path->GetEntryLabel());
3854   }
3855 
3856   if (type == DataType::Type::kReference) {
3857     // Check reference arguments against the varType.
3858     // False negatives due to varType being an interface or array type
3859     // or due to the missing read barrier are handled by the slow path.
3860     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3861     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3862     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3863       HInstruction* arg = invoke->InputAt(arg_index);
3864       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3865       if (!arg->IsNullConstant()) {
3866         vixl32::Register arg_reg = RegisterFrom(invoke->GetLocations()->InAt(arg_index));
3867         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
3868       }
3869     }
3870   }
3871 }
3872 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)3873 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3874                                               CodeGeneratorARMVIXL* codegen,
3875                                               SlowPathCodeARMVIXL* slow_path) {
3876   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3877   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
3878 
3879   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3880 
3881   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3882   vixl32::Register temp = temps.Acquire();
3883 
3884   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3885   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3886   __ Ldr(temp, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
3887   __ Cmp(temp, 0);
3888   __ B(ne, slow_path->GetEntryLabel());
3889 }
3890 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)3891 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3892                                                  CodeGeneratorARMVIXL* codegen,
3893                                                  SlowPathCodeARMVIXL* slow_path) {
3894   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3895   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
3896   vixl32::Register object = InputRegisterAt(invoke, 1);
3897 
3898   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3899   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3900 
3901   // Null-check the object.
3902   __ Cmp(object, 0);
3903   __ B(eq, slow_path->GetEntryLabel());
3904 
3905   // Use the first temporary register, whether it's for the declaring class or the offset.
3906   // It is not used yet at this point.
3907   vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
3908 
3909   // Check that the VarHandle references an instance field by checking that
3910   // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
3911   // type compatibility check with the source object's type, which will fail for null.
3912   {
3913     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3914     vixl32::Register temp2 = temps.Acquire();
3915     DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
3916     __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
3917     assembler->MaybeUnpoisonHeapReference(temp);
3918     // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3919     __ Cmp(temp2, 0);
3920     __ B(ne, slow_path->GetEntryLabel());
3921   }
3922 
3923   // Check that the object has the correct type.
3924   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3925   GenerateSubTypeObjectCheckNoReadBarrier(
3926       codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
3927 }
3928 
GetVarHandleExpectedValueType(HInvoke * invoke,size_t expected_coordinates_count)3929 static DataType::Type GetVarHandleExpectedValueType(HInvoke* invoke,
3930                                                     size_t expected_coordinates_count) {
3931   DCHECK_EQ(expected_coordinates_count, GetExpectedVarHandleCoordinatesCount(invoke));
3932   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3933   DCHECK_GE(number_of_arguments, /* VarHandle object */ 1u + expected_coordinates_count);
3934   if (number_of_arguments == /* VarHandle object */ 1u + expected_coordinates_count) {
3935     return invoke->GetType();
3936   } else {
3937     return GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
3938   }
3939 }
3940 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)3941 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3942                                          CodeGeneratorARMVIXL* codegen,
3943                                          VarHandleSlowPathARMVIXL* slow_path) {
3944   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3945   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
3946   vixl32::Register object = InputRegisterAt(invoke, 1);
3947   vixl32::Register index = InputRegisterAt(invoke, 2);
3948   DataType::Type value_type =
3949       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3950   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3951 
3952   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3953   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3954   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3955   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3956   const MemberOffset class_offset = mirror::Object::ClassOffset();
3957   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3958 
3959   // Null-check the object.
3960   __ Cmp(object, 0);
3961   __ B(eq, slow_path->GetEntryLabel());
3962 
3963   // Use the offset temporary register. It is not used yet at this point.
3964   vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
3965 
3966   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3967   vixl32::Register temp2 = temps.Acquire();
3968 
3969   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3970   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3971   // coordinateType0 shall not be null but we do not explicitly verify that.
3972   DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
3973   __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
3974   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3975   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3976   __ Cmp(temp2, 0);
3977   __ B(eq, slow_path->GetEntryLabel());
3978 
3979   // Check object class against componentType0.
3980   //
3981   // This is an exact check and we defer other cases to the runtime. This includes
3982   // conversion to array of superclass references, which is valid but subsequently
3983   // requires all update operations to check that the value can indeed be stored.
3984   // We do not want to perform such extra checks in the intrinsified code.
3985   //
3986   // We do this check without read barrier, so there can be false negatives which we
3987   // defer to the slow path. There shall be no false negatives for array classes in the
3988   // boot image (including Object[] and primitive arrays) because they are non-movable.
3989   __ Ldr(temp2, MemOperand(object, class_offset.Int32Value()));
3990   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3991   __ Cmp(temp, temp2);
3992   __ B(ne, slow_path->GetEntryLabel());
3993 
3994   // Check that the coordinateType0 is an array type. We do not need a read barrier
3995   // for loading constant reference fields (or chains of them) for comparison with null,
3996   // nor for finally loading a constant primitive field (primitive type) below.
3997   __ Ldr(temp2, MemOperand(temp, component_type_offset.Int32Value()));
3998   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3999   __ Cmp(temp2, 0);
4000   __ B(eq, slow_path->GetEntryLabel());
4001 
4002   // Check that the array component type matches the primitive type.
4003   // With the exception of `kPrimNot`, `kPrimByte` and `kPrimBoolean`,
4004   // we shall check for a byte array view in the slow path.
4005   // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4006   // so we cannot emit that if we're JITting without boot image.
4007   bool boot_image_available =
4008       codegen->GetCompilerOptions().IsBootImage() ||
4009       !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4010   DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
4011   size_t can_be_view =
4012       ((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
4013       boot_image_available;
4014   vixl32::Label* slow_path_label =
4015       can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4016   __ Ldrh(temp2, MemOperand(temp2, primitive_type_offset.Int32Value()));
4017   __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4018   __ B(ne, slow_path_label);
4019 
4020   // Check for array index out of bounds.
4021   __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
4022   __ Cmp(index, temp);
4023   __ B(hs, slow_path->GetEntryLabel());
4024 }
4025 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4026 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4027                                               CodeGeneratorARMVIXL* codegen,
4028                                               VarHandleSlowPathARMVIXL* slow_path) {
4029   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4030   if (expected_coordinates_count == 0u) {
4031     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4032   } else if (expected_coordinates_count == 1u) {
4033     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4034   } else {
4035     DCHECK_EQ(expected_coordinates_count, 2u);
4036     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4037   }
4038 }
4039 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,DataType::Type type)4040 static VarHandleSlowPathARMVIXL* GenerateVarHandleChecks(HInvoke* invoke,
4041                                                          CodeGeneratorARMVIXL* codegen,
4042                                                          std::memory_order order,
4043                                                          DataType::Type type) {
4044   VarHandleSlowPathARMVIXL* slow_path =
4045       new (codegen->GetScopedAllocator()) VarHandleSlowPathARMVIXL(invoke, order);
4046   codegen->AddSlowPath(slow_path);
4047 
4048   GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4049   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4050 
4051   return slow_path;
4052 }
4053 
4054 struct VarHandleTarget {
4055   vixl32::Register object;  // The object holding the value to operate on.
4056   vixl32::Register offset;  // The offset of the value to operate on.
4057 };
4058 
GetVarHandleTarget(HInvoke * invoke)4059 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4060   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4061   LocationSummary* locations = invoke->GetLocations();
4062 
4063   VarHandleTarget target;
4064   // The temporary allocated for loading the offset.
4065   target.offset = RegisterFrom(locations->GetTemp(0u));
4066   // The reference to the object that holds the value to operate on.
4067   target.object = (expected_coordinates_count == 0u)
4068       ? RegisterFrom(locations->GetTemp(1u))
4069       : InputRegisterAt(invoke, 1);
4070   return target;
4071 }
4072 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARMVIXL * codegen)4073 static void GenerateVarHandleTarget(HInvoke* invoke,
4074                                     const VarHandleTarget& target,
4075                                     CodeGeneratorARMVIXL* codegen) {
4076   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4077   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4078   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4079 
4080   if (expected_coordinates_count <= 1u) {
4081     // For static fields, we need to fill the `target.object` with the declaring class,
4082     // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
4083     // we do not need the declaring class, so we can forget the `ArtMethod*` when
4084     // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
4085     vixl32::Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
4086 
4087     const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4088     const MemberOffset offset_offset = ArtField::OffsetOffset();
4089 
4090     // Load the ArtField, the offset and, if needed, declaring class.
4091     __ Ldr(method, MemOperand(varhandle, art_field_offset.Int32Value()));
4092     __ Ldr(target.offset, MemOperand(method, offset_offset.Int32Value()));
4093     if (expected_coordinates_count == 0u) {
4094       codegen->GenerateGcRootFieldLoad(invoke,
4095                                        LocationFrom(target.object),
4096                                        method,
4097                                        ArtField::DeclaringClassOffset().Int32Value(),
4098                                        kCompilerReadBarrierOption);
4099     }
4100   } else {
4101     DCHECK_EQ(expected_coordinates_count, 2u);
4102     DataType::Type value_type =
4103         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4104     uint32_t size_shift = DataType::SizeShift(value_type);
4105     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4106 
4107     vixl32::Register index = InputRegisterAt(invoke, 2);
4108     vixl32::Register shifted_index = index;
4109     if (size_shift != 0u) {
4110       shifted_index = target.offset;
4111       __ Lsl(shifted_index, index, size_shift);
4112     }
4113     __ Add(target.offset, shifted_index, data_offset.Int32Value());
4114   }
4115 }
4116 
HasVarHandleIntrinsicImplementation(HInvoke * invoke)4117 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
4118   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4119   if (expected_coordinates_count > 2u) {
4120     // Invalid coordinate count. This invoke shall throw at runtime.
4121     return false;
4122   }
4123   if (expected_coordinates_count != 0u &&
4124       invoke->InputAt(1)->GetType() != DataType::Type::kReference) {
4125     // Except for static fields (no coordinates), the first coordinate must be a reference.
4126     return false;
4127   }
4128   if (expected_coordinates_count == 2u) {
4129     // For arrays and views, the second coordinate must be convertible to `int`.
4130     // In this context, `boolean` is not convertible but we have to look at the shorty
4131     // as compiler transformations can give the invoke a valid boolean input.
4132     DataType::Type index_type = GetDataTypeFromShorty(invoke, 2);
4133     if (index_type == DataType::Type::kBool ||
4134         DataType::Kind(index_type) != DataType::Type::kInt32) {
4135       return false;
4136     }
4137   }
4138 
4139   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4140   DataType::Type return_type = invoke->GetType();
4141   mirror::VarHandle::AccessModeTemplate access_mode_template =
4142       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4143   switch (access_mode_template) {
4144     case mirror::VarHandle::AccessModeTemplate::kGet:
4145       // The return type should be the same as varType, so it shouldn't be void.
4146       if (return_type == DataType::Type::kVoid) {
4147         return false;
4148       }
4149       break;
4150     case mirror::VarHandle::AccessModeTemplate::kSet:
4151       if (return_type != DataType::Type::kVoid) {
4152         return false;
4153       }
4154       break;
4155     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet: {
4156       if (return_type != DataType::Type::kBool) {
4157         return false;
4158       }
4159       uint32_t expected_value_index = number_of_arguments - 2;
4160       uint32_t new_value_index = number_of_arguments - 1;
4161       DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4162       DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
4163       if (expected_value_type != new_value_type) {
4164         return false;
4165       }
4166       break;
4167     }
4168     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
4169       uint32_t expected_value_index = number_of_arguments - 2;
4170       uint32_t new_value_index = number_of_arguments - 1;
4171       DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4172       DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
4173       if (expected_value_type != new_value_type || return_type != expected_value_type) {
4174         return false;
4175       }
4176       break;
4177     }
4178     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
4179       DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
4180       if (IsVarHandleGetAndAdd(invoke) &&
4181           (value_type == DataType::Type::kReference || value_type == DataType::Type::kBool)) {
4182         // We should only add numerical types.
4183         return false;
4184       } else if (IsVarHandleGetAndBitwiseOp(invoke) && !DataType::IsIntegralType(value_type)) {
4185         // We can only apply operators to bitwise integral types.
4186         // Note that bitwise VarHandle operations accept a non-integral boolean type and
4187         // perform the appropriate logical operation. However, the result is the same as
4188         // using the bitwise operation on our boolean representation and this fits well
4189         // with DataType::IsIntegralType() treating the compiler type kBool as integral.
4190         return false;
4191       }
4192       if (value_type != return_type) {
4193         return false;
4194       }
4195       break;
4196     }
4197   }
4198 
4199   return true;
4200 }
4201 
CreateVarHandleCommonLocations(HInvoke * invoke)4202 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
4203   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4204   DataType::Type return_type = invoke->GetType();
4205 
4206   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4207   LocationSummary* locations =
4208       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4209   locations->SetInAt(0, Location::RequiresRegister());
4210   // Require coordinates in registers. These are the object holding the value
4211   // to operate on (except for static fields) and index (for arrays and views).
4212   for (size_t i = 0; i != expected_coordinates_count; ++i) {
4213     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4214   }
4215   if (return_type != DataType::Type::kVoid) {
4216     if (DataType::IsFloatingPointType(return_type)) {
4217       locations->SetOut(Location::RequiresFpuRegister());
4218     } else {
4219       locations->SetOut(Location::RequiresRegister());
4220     }
4221   }
4222   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4223   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4224   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4225     HInstruction* arg = invoke->InputAt(arg_index);
4226     if (DataType::IsFloatingPointType(arg->GetType())) {
4227       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4228     } else {
4229       locations->SetInAt(arg_index, Location::RequiresRegister());
4230     }
4231   }
4232 
4233   // Add a temporary for offset.
4234   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4235       GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4236     // To preserve the offset value across the non-Baker read barrier slow path
4237     // for loading the declaring class, use a fixed callee-save register.
4238     constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4239     locations->AddTemp(Location::RegisterLocation(first_callee_save));
4240   } else {
4241     locations->AddTemp(Location::RequiresRegister());
4242   }
4243   if (expected_coordinates_count == 0u) {
4244     // Add a temporary to hold the declaring class.
4245     locations->AddTemp(Location::RequiresRegister());
4246   }
4247 
4248   return locations;
4249 }
4250 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4251 static void CreateVarHandleGetLocations(HInvoke* invoke,
4252                                         CodeGeneratorARMVIXL* codegen,
4253                                         bool atomic) {
4254   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4255     return;
4256   }
4257 
4258   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4259       invoke->GetType() == DataType::Type::kReference &&
4260       invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4261       invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4262     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4263     // the passed reference and reloads it from the field. This gets the memory visibility
4264     // wrong for Acquire/Volatile operations. b/173104084
4265     return;
4266   }
4267 
4268   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4269 
4270   DataType::Type type = invoke->GetType();
4271   if (type == DataType::Type::kFloat64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
4272     // We need 3 temporaries for GenerateIntrinsicGet() but we can reuse the
4273     // declaring class (if present) and offset temporary.
4274     DCHECK_EQ(locations->GetTempCount(),
4275               (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4276     locations->AddRegisterTemps(3u - locations->GetTempCount());
4277   }
4278 }
4279 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4280 static void GenerateVarHandleGet(HInvoke* invoke,
4281                                  CodeGeneratorARMVIXL* codegen,
4282                                  std::memory_order order,
4283                                  bool atomic,
4284                                  bool byte_swap = false) {
4285   DataType::Type type = invoke->GetType();
4286   DCHECK_NE(type, DataType::Type::kVoid);
4287 
4288   LocationSummary* locations = invoke->GetLocations();
4289   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4290   Location out = locations->Out();
4291 
4292   VarHandleTarget target = GetVarHandleTarget(invoke);
4293   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4294   if (!byte_swap) {
4295     slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4296     slow_path->SetAtomic(atomic);
4297     GenerateVarHandleTarget(invoke, target, codegen);
4298     __ Bind(slow_path->GetNativeByteOrderLabel());
4299   }
4300 
4301   Location maybe_temp = Location::NoLocation();
4302   Location maybe_temp2 = Location::NoLocation();
4303   Location maybe_temp3 = Location::NoLocation();
4304   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) {
4305     // Reuse the offset temporary.
4306     maybe_temp = LocationFrom(target.offset);
4307   } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4308     // Reuse the offset temporary and declaring class (if present).
4309     // The address shall be constructed in the scratch register before they are clobbered.
4310     maybe_temp = LocationFrom(target.offset);
4311     DCHECK(maybe_temp.Equals(locations->GetTemp(0)));
4312     if (type == DataType::Type::kFloat64) {
4313       maybe_temp2 = locations->GetTemp(1);
4314       maybe_temp3 = locations->GetTemp(2);
4315     }
4316   }
4317 
4318   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4319   Location loaded_value = out;
4320   DataType::Type load_type = type;
4321   if (byte_swap) {
4322     if (type == DataType::Type::kFloat64) {
4323       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4324         // Change load type to Int64 and promote `maybe_temp2` and `maybe_temp3` to `loaded_value`.
4325         loaded_value = LocationFrom(RegisterFrom(maybe_temp2), RegisterFrom(maybe_temp3));
4326         maybe_temp2 = Location::NoLocation();
4327         maybe_temp3 = Location::NoLocation();
4328       } else {
4329         // Use the offset temporary and the scratch register.
4330         loaded_value = LocationFrom(target.offset, temps.Acquire());
4331       }
4332       load_type = DataType::Type::kInt64;
4333     } else if (type == DataType::Type::kFloat32) {
4334       // Reuse the offset temporary.
4335       loaded_value = LocationFrom(target.offset);
4336       load_type = DataType::Type::kInt32;
4337     } else if (type == DataType::Type::kInt64) {
4338       // Swap the high and low registers and reverse the bytes in each after the load.
4339       loaded_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
4340     }
4341   }
4342 
4343   GenerateIntrinsicGet(invoke,
4344                        codegen,
4345                        load_type,
4346                        order,
4347                        atomic,
4348                        target.object,
4349                        target.offset,
4350                        loaded_value,
4351                        maybe_temp,
4352                        maybe_temp2,
4353                        maybe_temp3);
4354   if (byte_swap) {
4355     if (type == DataType::Type::kInt64) {
4356       GenerateReverseBytesInPlaceForEachWord(assembler, loaded_value);
4357     } else {
4358       GenerateReverseBytes(assembler, type, loaded_value, out);
4359     }
4360   }
4361 
4362   if (!byte_swap) {
4363     __ Bind(slow_path->GetExitLabel());
4364   }
4365 }
4366 
VisitVarHandleGet(HInvoke * invoke)4367 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4368   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ false);
4369 }
4370 
VisitVarHandleGet(HInvoke * invoke)4371 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4372   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4373 }
4374 
VisitVarHandleGetOpaque(HInvoke * invoke)4375 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4376   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4377 }
4378 
VisitVarHandleGetOpaque(HInvoke * invoke)4379 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4380   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4381 }
4382 
VisitVarHandleGetAcquire(HInvoke * invoke)4383 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4384   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4385 }
4386 
VisitVarHandleGetAcquire(HInvoke * invoke)4387 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4388   GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire, /*atomic=*/ true);
4389 }
4390 
VisitVarHandleGetVolatile(HInvoke * invoke)4391 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4392   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4393 }
4394 
VisitVarHandleGetVolatile(HInvoke * invoke)4395 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4396   GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4397 }
4398 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4399 static void CreateVarHandleSetLocations(HInvoke* invoke,
4400                                         CodeGeneratorARMVIXL* codegen,
4401                                         bool atomic) {
4402   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4403     return;
4404   }
4405 
4406   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4407 
4408   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4409   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4410   if (DataType::Is64BitType(value_type)) {
4411     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4412     DCHECK_EQ(locations->GetTempCount(), (expected_coordinates_count == 0) ? 2u : 1u);
4413     HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
4414     bool has_reverse_bytes_slow_path =
4415         (expected_coordinates_count == 2u) &&
4416         !(arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern());
4417     if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4418       // We need 4 temporaries in the byte array view slow path. Otherwise, we need
4419       // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
4420       // We can reuse the offset temporary and declaring class (if present).
4421       size_t temps_needed = has_reverse_bytes_slow_path
4422           ? 4u
4423           : ((value_type == DataType::Type::kFloat64) ? 3u : 2u);
4424       locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4425     } else if (has_reverse_bytes_slow_path) {
4426       // We need 2 temps for the value with reversed bytes in the byte array view slow path.
4427       // We can reuse the offset temporary.
4428       DCHECK_EQ(locations->GetTempCount(), 1u);
4429       locations->AddTemp(Location::RequiresRegister());
4430     }
4431   }
4432 }
4433 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4434 static void GenerateVarHandleSet(HInvoke* invoke,
4435                                  CodeGeneratorARMVIXL* codegen,
4436                                  std::memory_order order,
4437                                  bool atomic,
4438                                  bool byte_swap = false) {
4439   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4440   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4441 
4442   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4443   LocationSummary* locations = invoke->GetLocations();
4444   Location value = locations->InAt(value_index);
4445 
4446   VarHandleTarget target = GetVarHandleTarget(invoke);
4447   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4448   if (!byte_swap) {
4449     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4450     slow_path->SetAtomic(atomic);
4451     GenerateVarHandleTarget(invoke, target, codegen);
4452     __ Bind(slow_path->GetNativeByteOrderLabel());
4453   }
4454 
4455   Location maybe_temp = Location::NoLocation();
4456   Location maybe_temp2 = Location::NoLocation();
4457   Location maybe_temp3 = Location::NoLocation();
4458   if (DataType::Is64BitType(value_type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4459     // Reuse the offset temporary and declaring class (if present).
4460     // The address shall be constructed in the scratch register before they are clobbered.
4461     maybe_temp = locations->GetTemp(0);
4462     maybe_temp2 = locations->GetTemp(1);
4463     if (value_type == DataType::Type::kFloat64) {
4464       maybe_temp3 = locations->GetTemp(2);
4465     }
4466   }
4467 
4468   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4469   if (byte_swap) {
4470     if (DataType::Is64BitType(value_type) || value_type == DataType::Type::kFloat32) {
4471       // Calculate the address in scratch register, so that we can use the offset temporary.
4472       vixl32::Register base = temps.Acquire();
4473       __ Add(base, target.object, target.offset);
4474       target.object = base;
4475       target.offset = vixl32::Register();
4476     }
4477     Location original_value = value;
4478     if (DataType::Is64BitType(value_type)) {
4479       size_t temp_start = 0u;
4480       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4481         // Clear `maybe_temp3` which was initialized above for Float64.
4482         DCHECK(value_type != DataType::Type::kFloat64 || maybe_temp3.Equals(locations->GetTemp(2)));
4483         maybe_temp3 = Location::NoLocation();
4484         temp_start = 2u;
4485       }
4486       value = LocationFrom(RegisterFrom(locations->GetTemp(temp_start)),
4487                            RegisterFrom(locations->GetTemp(temp_start + 1u)));
4488       if (value_type == DataType::Type::kFloat64) {
4489         __ Vmov(HighRegisterFrom(value), LowRegisterFrom(value), DRegisterFrom(original_value));
4490         GenerateReverseBytesInPlaceForEachWord(assembler, value);
4491         value_type = DataType::Type::kInt64;
4492       } else {
4493         GenerateReverseBytes(assembler, value_type, original_value, value);
4494       }
4495     } else if (value_type == DataType::Type::kFloat32) {
4496       value = locations->GetTemp(0);  // Use the offset temporary which was freed above.
4497       __ Vmov(RegisterFrom(value), SRegisterFrom(original_value));
4498       GenerateReverseBytes(assembler, DataType::Type::kInt32, value, value);
4499       value_type = DataType::Type::kInt32;
4500     } else {
4501       value = LocationFrom(temps.Acquire());
4502       GenerateReverseBytes(assembler, value_type, original_value, value);
4503     }
4504   }
4505 
4506   GenerateIntrinsicSet(codegen,
4507                        value_type,
4508                        order,
4509                        atomic,
4510                        target.object,
4511                        target.offset,
4512                        value,
4513                        maybe_temp,
4514                        maybe_temp2,
4515                        maybe_temp3);
4516 
4517   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4518     // Reuse the offset temporary for MarkGCCard.
4519     vixl32::Register temp = target.offset;
4520     vixl32::Register card = temps.Acquire();
4521     vixl32::Register value_reg = RegisterFrom(value);
4522     codegen->MarkGCCard(temp, card, target.object, value_reg, /*value_can_be_null=*/ true);
4523   }
4524 
4525   if (!byte_swap) {
4526     __ Bind(slow_path->GetExitLabel());
4527   }
4528 }
4529 
VisitVarHandleSet(HInvoke * invoke)4530 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4531   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ false);
4532 }
4533 
VisitVarHandleSet(HInvoke * invoke)4534 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4535   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4536 }
4537 
VisitVarHandleSetOpaque(HInvoke * invoke)4538 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4539   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4540 }
4541 
VisitVarHandleSetOpaque(HInvoke * invoke)4542 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4543   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4544 }
4545 
VisitVarHandleSetRelease(HInvoke * invoke)4546 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4547   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4548 }
4549 
VisitVarHandleSetRelease(HInvoke * invoke)4550 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4551   GenerateVarHandleSet(invoke, codegen_, std::memory_order_release, /*atomic=*/ true);
4552 }
4553 
VisitVarHandleSetVolatile(HInvoke * invoke)4554 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4555   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4556 }
4557 
VisitVarHandleSetVolatile(HInvoke * invoke)4558 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4559   // ARM store-release instructions are implicitly sequentially consistent.
4560   GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4561 }
4562 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,bool return_success)4563 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
4564   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4565     return;
4566   }
4567 
4568   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4569   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4570   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4571       value_type == DataType::Type::kReference) {
4572     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4573     // the passed reference and reloads it from the field. This breaks the read barriers
4574     // in slow path in different ways. The marked old value may not actually be a to-space
4575     // reference to the same object as `old_value`, breaking slow path assumptions. And
4576     // for CompareAndExchange, marking the old value after comparison failure may actually
4577     // return the reference to `expected`, erroneously indicating success even though we
4578     // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4579     return;
4580   }
4581 
4582   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4583 
4584   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4585     // We need callee-save registers for both the class object and offset instead of
4586     // the temporaries reserved in CreateVarHandleCommonLocations().
4587     static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
4588     constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4589     constexpr int second_callee_save = CTZ(kArmCalleeSaveRefSpills ^ (1u << first_callee_save));
4590     if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4591       DCHECK_EQ(locations->GetTempCount(), 2u);
4592       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4593       DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4594       locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4595     } else {
4596       DCHECK_EQ(locations->GetTempCount(), 1u);
4597       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4598       locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4599     }
4600   }
4601 
4602   if (DataType::IsFloatingPointType(value_type)) {
4603     // We can reuse the declaring class (if present) and offset temporary.
4604     DCHECK_EQ(locations->GetTempCount(),
4605               (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4606     size_t temps_needed = (value_type == DataType::Type::kFloat64)
4607         ? (return_success ? 5u : 7u)
4608         : (return_success ? 3u : 4u);
4609     locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4610   } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4611     // Add temps for the byte-reversed `expected` and `new_value` in the byte array view slow path.
4612     DCHECK_EQ(locations->GetTempCount(), 1u);
4613     if (value_type == DataType::Type::kInt64) {
4614       // We would ideally add 4 temps for Int64 but that would simply run out of registers,
4615       // so we instead need to reverse bytes in actual arguments and undo it at the end.
4616     } else {
4617       locations->AddRegisterTemps(2u);
4618     }
4619   }
4620   if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4621     // Add a temporary for store result, also used for the `old_value_temp` in slow path.
4622     locations->AddTemp(Location::RequiresRegister());
4623   }
4624 }
4625 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4626 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4627                                                      CodeGeneratorARMVIXL* codegen,
4628                                                      std::memory_order order,
4629                                                      bool return_success,
4630                                                      bool strong,
4631                                                      bool byte_swap = false) {
4632   DCHECK(return_success || strong);
4633 
4634   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4635   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4636   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4637   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4638 
4639   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4640   LocationSummary* locations = invoke->GetLocations();
4641   Location expected = locations->InAt(expected_index);
4642   Location new_value = locations->InAt(new_value_index);
4643   Location out = locations->Out();
4644 
4645   VarHandleTarget target = GetVarHandleTarget(invoke);
4646   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4647   if (!byte_swap) {
4648     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4649     slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4650     GenerateVarHandleTarget(invoke, target, codegen);
4651     __ Bind(slow_path->GetNativeByteOrderLabel());
4652   }
4653 
4654   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
4655   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
4656   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
4657   DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
4658 
4659   if (release_barrier) {
4660     codegen->GenerateMemoryBarrier(
4661         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
4662   }
4663 
4664   // Calculate the pointer to the value.
4665   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4666   vixl32::Register tmp_ptr = temps.Acquire();
4667   __ Add(tmp_ptr, target.object, target.offset);
4668 
4669   // Move floating point values to temporaries and prepare output registers.
4670   // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4671   // Reuse the declaring class (if present) and offset temporary for non-reference types,
4672   // the address has already been constructed in the scratch register. We are more careful
4673   // for references due to read and write barrier, see below.
4674   Location old_value;
4675   vixl32::Register store_result;
4676   vixl32::Register success = return_success ? RegisterFrom(out) : vixl32::Register();
4677   DataType::Type cas_type = value_type;
4678   if (value_type == DataType::Type::kFloat64) {
4679     vixl32::DRegister expected_vreg = DRegisterFrom(expected);
4680     vixl32::DRegister new_value_vreg = DRegisterFrom(new_value);
4681     expected =
4682         LocationFrom(RegisterFrom(locations->GetTemp(0)), RegisterFrom(locations->GetTemp(1)));
4683     new_value =
4684         LocationFrom(RegisterFrom(locations->GetTemp(2)), RegisterFrom(locations->GetTemp(3)));
4685     store_result = RegisterFrom(locations->GetTemp(4));
4686     old_value = return_success
4687         ? LocationFrom(success, store_result)
4688         : LocationFrom(RegisterFrom(locations->GetTemp(5)), RegisterFrom(locations->GetTemp(6)));
4689     if (byte_swap) {
4690       __ Vmov(HighRegisterFrom(expected), LowRegisterFrom(expected), expected_vreg);
4691       __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), new_value_vreg);
4692       GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4693       GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4694     } else {
4695       __ Vmov(LowRegisterFrom(expected), HighRegisterFrom(expected), expected_vreg);
4696       __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), new_value_vreg);
4697     }
4698     cas_type = DataType::Type::kInt64;
4699   } else if (value_type == DataType::Type::kFloat32) {
4700     vixl32::SRegister expected_vreg = SRegisterFrom(expected);
4701     vixl32::SRegister new_value_vreg = SRegisterFrom(new_value);
4702     expected = locations->GetTemp(0);
4703     new_value = locations->GetTemp(1);
4704     store_result = RegisterFrom(locations->GetTemp(2));
4705     old_value = return_success ? LocationFrom(store_result) : locations->GetTemp(3);
4706     __ Vmov(RegisterFrom(expected), expected_vreg);
4707     __ Vmov(RegisterFrom(new_value), new_value_vreg);
4708     if (byte_swap) {
4709       GenerateReverseBytes(assembler, DataType::Type::kInt32, expected, expected);
4710       GenerateReverseBytes(assembler, DataType::Type::kInt32, new_value, new_value);
4711     }
4712     cas_type = DataType::Type::kInt32;
4713   } else if (value_type == DataType::Type::kInt64) {
4714     store_result = RegisterFrom(locations->GetTemp(0));
4715     old_value = return_success
4716         ? LocationFrom(success, store_result)
4717         // If swapping bytes, swap the high/low regs and reverse the bytes in each after the load.
4718         : byte_swap ? LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)) : out;
4719     if (byte_swap) {
4720       // Due to lack of registers, reverse bytes in `expected` and `new_value` and undo that later.
4721       GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4722       expected = LocationFrom(HighRegisterFrom(expected), LowRegisterFrom(expected));
4723       GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4724       new_value = LocationFrom(HighRegisterFrom(new_value), LowRegisterFrom(new_value));
4725     }
4726   } else {
4727     // Use the last temp. For references with read barriers, this is an extra temporary
4728     // allocated to avoid overwriting the temporaries for declaring class (if present)
4729     // and offset as they are needed in the slow path. Otherwise, this is the offset
4730     // temporary which also works for references without read barriers that need the
4731     // object register preserved for the write barrier.
4732     store_result = RegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u));
4733     old_value = return_success ? LocationFrom(store_result) : out;
4734     if (byte_swap) {
4735       DCHECK_EQ(locations->GetTempCount(), 3u);
4736       Location original_expected = expected;
4737       Location original_new_value = new_value;
4738       expected = locations->GetTemp(0);
4739       new_value = locations->GetTemp(1);
4740       GenerateReverseBytes(assembler, value_type, original_expected, expected);
4741       GenerateReverseBytes(assembler, value_type, original_new_value, new_value);
4742     }
4743   }
4744 
4745   vixl32::Label exit_loop_label;
4746   vixl32::Label* exit_loop = &exit_loop_label;
4747   vixl32::Label* cmp_failure = &exit_loop_label;
4748 
4749   if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4750     // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
4751     // reloaded old value for subsequent CAS in the slow path.
4752     vixl32::Register old_value_temp = store_result;
4753     ReadBarrierCasSlowPathARMVIXL* rb_slow_path =
4754         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
4755             invoke,
4756             strong,
4757             target.object,
4758             target.offset,
4759             RegisterFrom(expected),
4760             RegisterFrom(new_value),
4761             RegisterFrom(old_value),
4762             old_value_temp,
4763             store_result,
4764             success,
4765             codegen);
4766     codegen->AddSlowPath(rb_slow_path);
4767     exit_loop = rb_slow_path->GetExitLabel();
4768     cmp_failure = rb_slow_path->GetEntryLabel();
4769   }
4770 
4771   GenerateCompareAndSet(codegen,
4772                         cas_type,
4773                         strong,
4774                         cmp_failure,
4775                         /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
4776                         tmp_ptr,
4777                         expected,
4778                         new_value,
4779                         old_value,
4780                         store_result,
4781                         success);
4782   __ Bind(exit_loop);
4783 
4784   if (acquire_barrier) {
4785     codegen->GenerateMemoryBarrier(
4786         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
4787   }
4788 
4789   if (!return_success) {
4790     if (byte_swap) {
4791       if (value_type == DataType::Type::kInt64) {
4792         GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
4793         // Undo byte swapping in `expected` and `new_value`. We do not have the
4794         // information whether the value in these registers shall be needed later.
4795         GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4796         GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4797       } else {
4798         GenerateReverseBytes(assembler, value_type, old_value, out);
4799       }
4800     } else if (value_type == DataType::Type::kFloat64) {
4801       __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
4802     } else if (value_type == DataType::Type::kFloat32) {
4803       __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
4804     }
4805   }
4806 
4807   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
4808     // Reuse the offset temporary and scratch register for MarkGCCard.
4809     vixl32::Register temp = target.offset;
4810     vixl32::Register card = tmp_ptr;
4811     // Mark card for object assuming new value is stored.
4812     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
4813     codegen->MarkGCCard(temp, card, target.object, RegisterFrom(new_value), new_value_can_be_null);
4814   }
4815 
4816   if (!byte_swap) {
4817     __ Bind(slow_path->GetExitLabel());
4818   }
4819 }
4820 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4821 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4822   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4823 }
4824 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4825 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4826   GenerateVarHandleCompareAndSetOrExchange(
4827       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
4828 }
4829 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4830 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4831   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4832 }
4833 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4834 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4835   GenerateVarHandleCompareAndSetOrExchange(
4836       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
4837 }
4838 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4839 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4840   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4841 }
4842 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4843 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4844   GenerateVarHandleCompareAndSetOrExchange(
4845       invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
4846 }
4847 
VisitVarHandleCompareAndSet(HInvoke * invoke)4848 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4849   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4850 }
4851 
VisitVarHandleCompareAndSet(HInvoke * invoke)4852 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4853   GenerateVarHandleCompareAndSetOrExchange(
4854       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
4855 }
4856 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4857 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4858   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4859 }
4860 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4861 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4862   GenerateVarHandleCompareAndSetOrExchange(
4863       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
4864 }
4865 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4866 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4867   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4868 }
4869 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4870 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4871   GenerateVarHandleCompareAndSetOrExchange(
4872       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
4873 }
4874 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4875 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4876   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4877 }
4878 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4879 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4880   GenerateVarHandleCompareAndSetOrExchange(
4881       invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
4882 }
4883 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4884 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4885   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4886 }
4887 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4888 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4889   GenerateVarHandleCompareAndSetOrExchange(
4890       invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
4891 }
4892 
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,GetAndUpdateOp get_and_update_op)4893 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
4894                                                  GetAndUpdateOp get_and_update_op) {
4895   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4896     return;
4897   }
4898 
4899   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4900       invoke->GetType() == DataType::Type::kReference) {
4901     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4902     // the passed reference and reloads it from the field, thus seeing the new value
4903     // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
4904     return;
4905   }
4906 
4907   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4908 
4909   // We can reuse the declaring class (if present) and offset temporary, except for
4910   // non-Baker read barriers that need them for the slow path.
4911   DCHECK_EQ(locations->GetTempCount(),
4912             (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4913 
4914   DataType::Type value_type = invoke->GetType();
4915   if (get_and_update_op == GetAndUpdateOp::kSet) {
4916     if (DataType::IsFloatingPointType(value_type)) {
4917       // Add temps needed to do the GenerateGetAndUpdate() with core registers.
4918       size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
4919       locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4920     } else if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4921                value_type == DataType::Type::kReference) {
4922       // We need to preserve the declaring class (if present) and offset for read barrier
4923       // slow paths, so we must use a separate temporary for the exclusive store result.
4924       locations->AddTemp(Location::RequiresRegister());
4925     } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4926       // Add temps for the byte-reversed `arg` in the byte array view slow path.
4927       DCHECK_EQ(locations->GetTempCount(), 1u);
4928       locations->AddRegisterTemps((value_type == DataType::Type::kInt64) ? 2u : 1u);
4929     }
4930   } else {
4931     // We need temporaries for the new value and exclusive store result.
4932     size_t temps_needed = DataType::Is64BitType(value_type) ? 3u : 2u;
4933     if (get_and_update_op != GetAndUpdateOp::kAdd &&
4934         GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4935       // Add temps for the byte-reversed `arg` in the byte array view slow path.
4936       if (value_type == DataType::Type::kInt64) {
4937         // We would ideally add 2 temps for Int64 but that would simply run out of registers,
4938         // so we instead need to reverse bytes in the actual argument and undo it at the end.
4939       } else {
4940         temps_needed += 1u;
4941       }
4942     }
4943     locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4944     if (DataType::IsFloatingPointType(value_type)) {
4945       // Note: This shall allocate a D register. There is no way to request an S register.
4946       locations->AddTemp(Location::RequiresFpuRegister());
4947     }
4948   }
4949 }
4950 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)4951 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
4952                                           CodeGeneratorARMVIXL* codegen,
4953                                           GetAndUpdateOp get_and_update_op,
4954                                           std::memory_order order,
4955                                           bool byte_swap = false) {
4956   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
4957   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
4958 
4959   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4960   LocationSummary* locations = invoke->GetLocations();
4961   Location arg = locations->InAt(arg_index);
4962   Location out = locations->Out();
4963 
4964   VarHandleTarget target = GetVarHandleTarget(invoke);
4965   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4966   if (!byte_swap) {
4967     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4968     slow_path->SetGetAndUpdateOp(get_and_update_op);
4969     GenerateVarHandleTarget(invoke, target, codegen);
4970     __ Bind(slow_path->GetNativeByteOrderLabel());
4971   }
4972 
4973   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
4974   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
4975   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
4976   DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
4977 
4978   if (release_barrier) {
4979     codegen->GenerateMemoryBarrier(
4980         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
4981   }
4982 
4983   // Use the scratch register for the pointer to the target location.
4984   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4985   vixl32::Register tmp_ptr = temps.Acquire();
4986   __ Add(tmp_ptr, target.object, target.offset);
4987 
4988   // Use the offset temporary for the exclusive store result.
4989   vixl32::Register store_result = target.offset;
4990 
4991   // The load/store type is never floating point.
4992   DataType::Type load_store_type = DataType::IsFloatingPointType(value_type)
4993       ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
4994       : value_type;
4995 
4996   // Prepare register for old value and temporaries if any.
4997   Location old_value = out;
4998   Location maybe_temp = Location::NoLocation();
4999   Location maybe_vreg_temp = Location::NoLocation();
5000   if (get_and_update_op == GetAndUpdateOp::kSet) {
5001     // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5002     // rather than moving between core and FP registers in the loop.
5003     if (value_type == DataType::Type::kFloat64) {
5004       vixl32::DRegister arg_vreg = DRegisterFrom(arg);
5005       DCHECK_EQ(locations->GetTempCount(), 5u);  // `store_result` and the four here.
5006       old_value =
5007           LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)));
5008       arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4)));
5009       if (byte_swap) {
5010         __ Vmov(HighRegisterFrom(arg), LowRegisterFrom(arg), arg_vreg);
5011         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5012       } else {
5013         __ Vmov(LowRegisterFrom(arg), HighRegisterFrom(arg), arg_vreg);
5014       }
5015     } else if (value_type == DataType::Type::kFloat32) {
5016       vixl32::SRegister arg_vreg = SRegisterFrom(arg);
5017       DCHECK_EQ(locations->GetTempCount(), 3u);  // `store_result` and the two here.
5018       old_value = locations->GetTemp(1);
5019       arg = locations->GetTemp(2);
5020       __ Vmov(RegisterFrom(arg), arg_vreg);
5021       if (byte_swap) {
5022         GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
5023       }
5024     } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5025       if (kUseBakerReadBarrier) {
5026         // Load the old value initially to a temporary register.
5027         // We shall move it to `out` later with a read barrier.
5028         old_value = LocationFrom(store_result);
5029         store_result = RegisterFrom(out);  // Use the `out` for the exclusive store result.
5030       } else {
5031         // The store_result is a separate temporary.
5032         DCHECK(!store_result.Is(target.object));
5033         DCHECK(!store_result.Is(target.offset));
5034       }
5035     } else if (byte_swap) {
5036       Location original_arg = arg;
5037       arg = locations->GetTemp(1);
5038       if (value_type == DataType::Type::kInt64) {
5039         arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2)));
5040         // Swap the high/low regs and reverse the bytes in each after the load.
5041         old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5042       }
5043       GenerateReverseBytes(assembler, value_type, original_arg, arg);
5044     }
5045   } else {
5046     maybe_temp = DataType::Is64BitType(value_type)
5047         ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
5048         : locations->GetTemp(1);
5049     DCHECK(!maybe_temp.Contains(LocationFrom(store_result)));
5050     if (DataType::IsFloatingPointType(value_type)) {
5051       maybe_vreg_temp = locations->GetTemp(locations->GetTempCount() - 1u);
5052       DCHECK(maybe_vreg_temp.IsFpuRegisterPair());
5053     }
5054     if (byte_swap) {
5055       if (get_and_update_op == GetAndUpdateOp::kAdd) {
5056         // We need to do the byte swapping in the CAS loop for GetAndAdd.
5057         get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5058       } else if (value_type == DataType::Type::kInt64) {
5059         // Swap the high/low regs and reverse the bytes in each after the load.
5060         old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5061         // Due to lack of registers, reverse bytes in `arg` and undo that later.
5062         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5063         arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg));
5064       } else {
5065         DCHECK(!DataType::IsFloatingPointType(value_type));
5066         Location original_arg = arg;
5067         arg = locations->GetTemp(2);
5068         DCHECK(!arg.Contains(LocationFrom(store_result)));
5069         GenerateReverseBytes(assembler, value_type, original_arg, arg);
5070       }
5071     }
5072   }
5073 
5074   GenerateGetAndUpdate(codegen,
5075                        get_and_update_op,
5076                        load_store_type,
5077                        tmp_ptr,
5078                        arg,
5079                        old_value,
5080                        store_result,
5081                        maybe_temp,
5082                        maybe_vreg_temp);
5083 
5084   if (acquire_barrier) {
5085     codegen->GenerateMemoryBarrier(
5086         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5087   }
5088 
5089   if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
5090     if (value_type == DataType::Type::kInt64) {
5091       GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5092       if (get_and_update_op != GetAndUpdateOp::kSet) {
5093         // Undo byte swapping in `arg`. We do not have the information
5094         // whether the value in these registers shall be needed later.
5095         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5096       }
5097     } else {
5098       GenerateReverseBytes(assembler, value_type, old_value, out);
5099     }
5100   } else if (get_and_update_op == GetAndUpdateOp::kSet &&
5101              DataType::IsFloatingPointType(value_type)) {
5102     if (value_type == DataType::Type::kFloat64) {
5103       __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5104     } else {
5105       __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
5106     }
5107   } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5108     if (kUseBakerReadBarrier) {
5109       codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(RegisterFrom(out),
5110                                                             RegisterFrom(old_value));
5111     } else {
5112       codegen->GenerateReadBarrierSlow(
5113           invoke,
5114           Location::RegisterLocation(RegisterFrom(out).GetCode()),
5115           Location::RegisterLocation(RegisterFrom(old_value).GetCode()),
5116           Location::RegisterLocation(target.object.GetCode()),
5117           /*offset=*/ 0u,
5118           /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5119     }
5120   }
5121 
5122   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5123     // Reuse the offset temporary and scratch register for MarkGCCard.
5124     vixl32::Register temp = target.offset;
5125     vixl32::Register card = tmp_ptr;
5126     // Mark card for object assuming new value is stored.
5127     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5128     codegen->MarkGCCard(temp, card, target.object, RegisterFrom(arg), new_value_can_be_null);
5129   }
5130 
5131   if (!byte_swap) {
5132     __ Bind(slow_path->GetExitLabel());
5133   }
5134 }
5135 
VisitVarHandleGetAndSet(HInvoke * invoke)5136 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5137   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5138 }
5139 
VisitVarHandleGetAndSet(HInvoke * invoke)5140 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5141   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5142 }
5143 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5144 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5145   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5146 }
5147 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5148 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5149   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5150 }
5151 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5152 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5153   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5154 }
5155 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5156 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5157   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5158 }
5159 
VisitVarHandleGetAndAdd(HInvoke * invoke)5160 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5161   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5162 }
5163 
VisitVarHandleGetAndAdd(HInvoke * invoke)5164 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5165   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5166 }
5167 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5168 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5169   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5170 }
5171 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5172 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5173   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5174 }
5175 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5176 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5177   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5178 }
5179 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5180 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5181   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5182 }
5183 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5184 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5185   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5186 }
5187 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5188 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5189   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5190 }
5191 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5192 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5193   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5194 }
5195 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5196 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5197   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5198 }
5199 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5200 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5201   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5202 }
5203 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5204 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5205   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5206 }
5207 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5208 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5209   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5210 }
5211 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5212 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5213   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5214 }
5215 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5216 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5217   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5218 }
5219 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5220 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5221   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5222 }
5223 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5224 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5225   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5226 }
5227 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5228 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5229   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5230 }
5231 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5232 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5233   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5234 }
5235 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5236 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5237   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5238 }
5239 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5240 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5241   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5242 }
5243 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5244 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5245   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5246 }
5247 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5248 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5249   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5250 }
5251 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5252 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5253   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5254 }
5255 
EmitByteArrayViewCode(CodeGenerator * codegen_in)5256 void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5257   DCHECK(GetByteArrayViewCheckLabel()->IsReferenced());
5258   CodeGeneratorARMVIXL* codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_in);
5259   ArmVIXLAssembler* assembler = codegen->GetAssembler();
5260   HInvoke* invoke = GetInvoke();
5261   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5262   DataType::Type value_type =
5263       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5264   DCHECK_NE(value_type, DataType::Type::kReference);
5265   size_t size = DataType::Size(value_type);
5266   DCHECK_GT(size, 1u);
5267   vixl32::Operand size_operand(dchecked_integral_cast<int32_t>(size));
5268   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
5269   vixl32::Register object = InputRegisterAt(invoke, 1);
5270   vixl32::Register index = InputRegisterAt(invoke, 2);
5271 
5272   MemberOffset class_offset = mirror::Object::ClassOffset();
5273   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5274   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5275   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5276 
5277   __ Bind(GetByteArrayViewCheckLabel());
5278 
5279   VarHandleTarget target = GetVarHandleTarget(invoke);
5280   {
5281     // Use the offset temporary register. It is not used yet at this point.
5282     vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
5283 
5284     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5285     vixl32::Register temp2 = temps.Acquire();
5286 
5287     // The main path checked that the coordinateType0 is an array class that matches
5288     // the class of the actual coordinate argument but it does not match the value type.
5289     // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5290     __ Ldr(temp, MemOperand(varhandle, class_offset.Int32Value()));
5291     codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5292     __ Cmp(temp, temp2);
5293     __ B(ne, GetEntryLabel());
5294 
5295     // Check for array index out of bounds.
5296     __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
5297     if (!temp.IsLow()) {
5298       // Avoid using the 32-bit `cmp temp, #imm` in IT block by loading `size` into `temp2`.
5299       __ Mov(temp2, size_operand);
5300     }
5301     __ Subs(temp, temp, index);
5302     {
5303       // Use ExactAssemblyScope here because we are using IT.
5304       ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
5305                                   2 * k16BitT32InstructionSizeInBytes);
5306       __ it(hs);
5307       if (temp.IsLow()) {
5308         __ cmp(hs, temp, size_operand);
5309       } else {
5310         __ cmp(hs, temp, temp2);
5311       }
5312     }
5313     __ B(lo, GetEntryLabel());
5314 
5315     // Construct the target.
5316     __ Add(target.offset, index, data_offset.Int32Value());  // Note: `temp` cannot be used below.
5317 
5318     // Alignment check. For unaligned access, go to the runtime.
5319     DCHECK(IsPowerOfTwo(size));
5320     __ Tst(target.offset, dchecked_integral_cast<int32_t>(size - 1u));
5321     __ B(ne, GetEntryLabel());
5322 
5323     // Byte order check. For native byte order return to the main path.
5324     if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
5325       HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
5326       if (arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern()) {
5327         // There is no reason to differentiate between native byte order and byte-swap
5328         // for setting a zero bit pattern. Just return to the main path.
5329         __ B(GetNativeByteOrderLabel());
5330         return;
5331       }
5332     }
5333     __ Ldr(temp2, MemOperand(varhandle, native_byte_order_offset.Int32Value()));
5334     __ Cmp(temp2, 0);
5335     __ B(ne, GetNativeByteOrderLabel());
5336   }
5337 
5338   switch (access_mode_template) {
5339     case mirror::VarHandle::AccessModeTemplate::kGet:
5340       GenerateVarHandleGet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5341       break;
5342     case mirror::VarHandle::AccessModeTemplate::kSet:
5343       GenerateVarHandleSet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5344       break;
5345     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5346     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5347       GenerateVarHandleCompareAndSetOrExchange(
5348           invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5349       break;
5350     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5351       GenerateVarHandleGetAndUpdate(
5352           invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5353       break;
5354   }
5355   __ B(GetExitLabel());
5356 }
5357 
5358 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
5359 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
5360 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
5361 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongDivideUnsigned)
5362 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
5363 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
5364 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
5365 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat)
5366 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf)
5367 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor)
5368 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil)
5369 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint)
5370 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater)
5371 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals)
5372 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less)
5373 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals)
5374 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMultiplyHigh)
5375 
5376 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
5377 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
5378 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
5379 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
5380 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
5381 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject);
5382 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString);
5383 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence);
5384 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray);
5385 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean);
5386 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar);
5387 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt);
5388 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong);
5389 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat);
5390 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble);
5391 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
5392 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
5393 
5394 // 1.8.
5395 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
5396 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
5397 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
5398 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
5399 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
5400 
5401 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvokeExact)
5402 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvoke)
5403 
5404 UNREACHABLE_INTRINSICS(ARMVIXL)
5405 
5406 #undef __
5407 
5408 }  // namespace arm
5409 }  // namespace art
5410