1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm64.h"
18
19 #include "arch/arm64/callee_save_frame_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "code_generator_arm64.h"
24 #include "common_arm64.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string-inl.h"
35 #include "mirror/var_handle.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread-current-inl.h"
38 #include "utils/arm64/assembler_arm64.h"
39
40 using namespace vixl::aarch64; // NOLINT(build/namespaces)
41
42 // TODO(VIXL): Make VIXL compile with -Wshadow.
43 #pragma GCC diagnostic push
44 #pragma GCC diagnostic ignored "-Wshadow"
45 #include "aarch64/disasm-aarch64.h"
46 #include "aarch64/macro-assembler-aarch64.h"
47 #pragma GCC diagnostic pop
48
49 namespace art {
50
51 namespace arm64 {
52
53 using helpers::CPURegisterFrom;
54 using helpers::DRegisterFrom;
55 using helpers::HeapOperand;
56 using helpers::LocationFrom;
57 using helpers::InputCPURegisterOrZeroRegAt;
58 using helpers::IsConstantZeroBitPattern;
59 using helpers::OperandFrom;
60 using helpers::RegisterFrom;
61 using helpers::SRegisterFrom;
62 using helpers::WRegisterFrom;
63 using helpers::XRegisterFrom;
64 using helpers::HRegisterFrom;
65 using helpers::InputRegisterAt;
66 using helpers::OutputRegister;
67
68 namespace {
69
AbsoluteHeapOperandFrom(Location location,size_t offset=0)70 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
71 return MemOperand(XRegisterFrom(location), offset);
72 }
73
74 } // namespace
75
GetVIXLAssembler()76 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
77 return codegen_->GetVIXLAssembler();
78 }
79
GetAllocator()80 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
81 return codegen_->GetGraph()->GetAllocator();
82 }
83
84 using IntrinsicSlowPathARM64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM64,
85 SlowPathCodeARM64,
86 Arm64Assembler>;
87
88 #define __ codegen->GetVIXLAssembler()->
89
90 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
91 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
92 public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction * instruction,Location tmp)93 ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
94 : SlowPathCodeARM64(instruction), tmp_(tmp) {
95 DCHECK(kEmitCompilerReadBarrier);
96 DCHECK(kUseBakerReadBarrier);
97 }
98
EmitNativeCode(CodeGenerator * codegen_in)99 void EmitNativeCode(CodeGenerator* codegen_in) override {
100 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
101 LocationSummary* locations = instruction_->GetLocations();
102 DCHECK(locations->CanCall());
103 DCHECK(instruction_->IsInvokeStaticOrDirect())
104 << "Unexpected instruction in read barrier arraycopy slow path: "
105 << instruction_->DebugName();
106 DCHECK(instruction_->GetLocations()->Intrinsified());
107 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
108
109 const int32_t element_size = DataType::Size(DataType::Type::kReference);
110
111 Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
112 Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
113 Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
114 Register tmp_reg = WRegisterFrom(tmp_);
115
116 __ Bind(GetEntryLabel());
117 vixl::aarch64::Label slow_copy_loop;
118 __ Bind(&slow_copy_loop);
119 __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
120 codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
121 // TODO: Inline the mark bit check before calling the runtime?
122 // tmp_reg = ReadBarrier::Mark(tmp_reg);
123 // No need to save live registers; it's taken care of by the
124 // entrypoint. Also, there is no need to update the stack mask,
125 // as this runtime call will not trigger a garbage collection.
126 // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
127 // explanations.)
128 DCHECK_NE(tmp_.reg(), LR);
129 DCHECK_NE(tmp_.reg(), WSP);
130 DCHECK_NE(tmp_.reg(), WZR);
131 // IP0 is used internally by the ReadBarrierMarkRegX entry point
132 // as a temporary (and not preserved). It thus cannot be used by
133 // any live register in this slow path.
134 DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
135 DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
136 DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
137 DCHECK_NE(tmp_.reg(), IP0);
138 DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
139 // TODO: Load the entrypoint once before the loop, instead of
140 // loading it at every iteration.
141 int32_t entry_point_offset =
142 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
143 // This runtime call does not require a stack map.
144 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
145 codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
146 __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
147 __ Cmp(src_curr_addr, src_stop_addr);
148 __ B(&slow_copy_loop, ne);
149 __ B(GetExitLabel());
150 }
151
GetDescription() const152 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
153
154 private:
155 Location tmp_;
156
157 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
158 };
159 #undef __
160
TryDispatch(HInvoke * invoke)161 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
162 Dispatch(invoke);
163 LocationSummary* res = invoke->GetLocations();
164 if (res == nullptr) {
165 return false;
166 }
167 return res->Intrinsified();
168 }
169
170 #define __ masm->
171
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)172 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
173 LocationSummary* locations =
174 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
175 locations->SetInAt(0, Location::RequiresFpuRegister());
176 locations->SetOut(Location::RequiresRegister());
177 }
178
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)179 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
180 LocationSummary* locations =
181 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
182 locations->SetInAt(0, Location::RequiresRegister());
183 locations->SetOut(Location::RequiresFpuRegister());
184 }
185
MoveFPToInt(LocationSummary * locations,bool is64bit,MacroAssembler * masm)186 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
187 Location input = locations->InAt(0);
188 Location output = locations->Out();
189 __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
190 is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
191 }
192
MoveIntToFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)193 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
194 Location input = locations->InAt(0);
195 Location output = locations->Out();
196 __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
197 is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
198 }
199
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)200 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
201 CreateFPToIntLocations(allocator_, invoke);
202 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)203 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
204 CreateIntToFPLocations(allocator_, invoke);
205 }
206
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)207 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
208 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
209 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)210 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
211 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
212 }
213
VisitFloatFloatToRawIntBits(HInvoke * invoke)214 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
215 CreateFPToIntLocations(allocator_, invoke);
216 }
VisitFloatIntBitsToFloat(HInvoke * invoke)217 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
218 CreateIntToFPLocations(allocator_, invoke);
219 }
220
VisitFloatFloatToRawIntBits(HInvoke * invoke)221 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
222 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
223 }
VisitFloatIntBitsToFloat(HInvoke * invoke)224 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
225 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
226 }
227
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)228 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
229 LocationSummary* locations =
230 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
231 locations->SetInAt(0, Location::RequiresRegister());
232 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
233 }
234
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)235 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
236 LocationSummary* locations =
237 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
238 locations->SetInAt(0, Location::RequiresRegister());
239 locations->SetInAt(1, Location::RequiresRegister());
240 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
241 }
242
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)243 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
244 LocationSummary* locations =
245 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
246 locations->SetInAt(0, Location::RequiresRegister());
247 locations->SetInAt(1, Location::RequiresRegister());
248 // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
249 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
250 }
251
GenerateReverseBytes(MacroAssembler * masm,DataType::Type type,CPURegister in,CPURegister out)252 static void GenerateReverseBytes(MacroAssembler* masm,
253 DataType::Type type,
254 CPURegister in,
255 CPURegister out) {
256 switch (type) {
257 case DataType::Type::kUint16:
258 __ Rev16(out.W(), in.W());
259 break;
260 case DataType::Type::kInt16:
261 __ Rev16(out.W(), in.W());
262 __ Sxth(out.W(), out.W());
263 break;
264 case DataType::Type::kInt32:
265 __ Rev(out.W(), in.W());
266 break;
267 case DataType::Type::kInt64:
268 __ Rev(out.X(), in.X());
269 break;
270 case DataType::Type::kFloat32:
271 __ Rev(in.W(), in.W()); // Note: Clobbers `in`.
272 __ Fmov(out.S(), in.W());
273 break;
274 case DataType::Type::kFloat64:
275 __ Rev(in.X(), in.X()); // Note: Clobbers `in`.
276 __ Fmov(out.D(), in.X());
277 break;
278 default:
279 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
280 UNREACHABLE();
281 }
282 }
283
GenReverseBytes(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)284 static void GenReverseBytes(LocationSummary* locations,
285 DataType::Type type,
286 MacroAssembler* masm) {
287 Location in = locations->InAt(0);
288 Location out = locations->Out();
289 GenerateReverseBytes(masm, type, CPURegisterFrom(in, type), CPURegisterFrom(out, type));
290 }
291
VisitIntegerReverseBytes(HInvoke * invoke)292 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
293 CreateIntToIntLocations(allocator_, invoke);
294 }
295
VisitIntegerReverseBytes(HInvoke * invoke)296 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
297 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
298 }
299
VisitLongReverseBytes(HInvoke * invoke)300 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
301 CreateIntToIntLocations(allocator_, invoke);
302 }
303
VisitLongReverseBytes(HInvoke * invoke)304 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
305 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
306 }
307
VisitShortReverseBytes(HInvoke * invoke)308 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
309 CreateIntToIntLocations(allocator_, invoke);
310 }
311
VisitShortReverseBytes(HInvoke * invoke)312 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
313 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
314 }
315
GenNumberOfLeadingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)316 static void GenNumberOfLeadingZeros(LocationSummary* locations,
317 DataType::Type type,
318 MacroAssembler* masm) {
319 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
320
321 Location in = locations->InAt(0);
322 Location out = locations->Out();
323
324 __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
325 }
326
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)327 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
328 CreateIntToIntLocations(allocator_, invoke);
329 }
330
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)331 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
332 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
333 }
334
VisitLongNumberOfLeadingZeros(HInvoke * invoke)335 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
336 CreateIntToIntLocations(allocator_, invoke);
337 }
338
VisitLongNumberOfLeadingZeros(HInvoke * invoke)339 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
340 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
341 }
342
GenNumberOfTrailingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)343 static void GenNumberOfTrailingZeros(LocationSummary* locations,
344 DataType::Type type,
345 MacroAssembler* masm) {
346 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
347
348 Location in = locations->InAt(0);
349 Location out = locations->Out();
350
351 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
352 __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
353 }
354
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)355 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
356 CreateIntToIntLocations(allocator_, invoke);
357 }
358
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)359 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
360 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
361 }
362
VisitLongNumberOfTrailingZeros(HInvoke * invoke)363 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
364 CreateIntToIntLocations(allocator_, invoke);
365 }
366
VisitLongNumberOfTrailingZeros(HInvoke * invoke)367 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
368 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
369 }
370
GenReverse(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)371 static void GenReverse(LocationSummary* locations,
372 DataType::Type type,
373 MacroAssembler* masm) {
374 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
375
376 Location in = locations->InAt(0);
377 Location out = locations->Out();
378
379 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
380 }
381
VisitIntegerReverse(HInvoke * invoke)382 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
383 CreateIntToIntLocations(allocator_, invoke);
384 }
385
VisitIntegerReverse(HInvoke * invoke)386 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
387 GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
388 }
389
VisitLongReverse(HInvoke * invoke)390 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
391 CreateIntToIntLocations(allocator_, invoke);
392 }
393
VisitLongReverse(HInvoke * invoke)394 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
395 GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
396 }
397
GenBitCount(HInvoke * instr,DataType::Type type,MacroAssembler * masm)398 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
399 DCHECK(DataType::IsIntOrLongType(type)) << type;
400 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
401 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
402
403 UseScratchRegisterScope temps(masm);
404
405 Register src = InputRegisterAt(instr, 0);
406 Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
407 VRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
408
409 __ Fmov(fpr, src);
410 __ Cnt(fpr.V8B(), fpr.V8B());
411 __ Addv(fpr.B(), fpr.V8B());
412 __ Fmov(dst, fpr);
413 }
414
VisitLongBitCount(HInvoke * invoke)415 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
416 CreateIntToIntLocations(allocator_, invoke);
417 }
418
VisitLongBitCount(HInvoke * invoke)419 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
420 GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
421 }
422
VisitIntegerBitCount(HInvoke * invoke)423 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
424 CreateIntToIntLocations(allocator_, invoke);
425 }
426
VisitIntegerBitCount(HInvoke * invoke)427 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
428 GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
429 }
430
GenHighestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)431 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
432 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
433
434 UseScratchRegisterScope temps(masm);
435
436 Register src = InputRegisterAt(invoke, 0);
437 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
438 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
439 size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
440 size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
441
442 __ Clz(temp, src);
443 __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate)
444 __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0.
445 __ Lsr(dst, dst, temp);
446 }
447
VisitIntegerHighestOneBit(HInvoke * invoke)448 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
449 CreateIntToIntLocations(allocator_, invoke);
450 }
451
VisitIntegerHighestOneBit(HInvoke * invoke)452 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
453 GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
454 }
455
VisitLongHighestOneBit(HInvoke * invoke)456 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
457 CreateIntToIntLocations(allocator_, invoke);
458 }
459
VisitLongHighestOneBit(HInvoke * invoke)460 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
461 GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
462 }
463
GenLowestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)464 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
465 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
466
467 UseScratchRegisterScope temps(masm);
468
469 Register src = InputRegisterAt(invoke, 0);
470 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
471 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
472
473 __ Neg(temp, src);
474 __ And(dst, temp, src);
475 }
476
VisitIntegerLowestOneBit(HInvoke * invoke)477 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
478 CreateIntToIntLocations(allocator_, invoke);
479 }
480
VisitIntegerLowestOneBit(HInvoke * invoke)481 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
482 GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
483 }
484
VisitLongLowestOneBit(HInvoke * invoke)485 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
486 CreateIntToIntLocations(allocator_, invoke);
487 }
488
VisitLongLowestOneBit(HInvoke * invoke)489 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
490 GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
491 }
492
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)493 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
494 LocationSummary* locations =
495 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
496 locations->SetInAt(0, Location::RequiresFpuRegister());
497 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
498 }
499
VisitMathSqrt(HInvoke * invoke)500 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
501 CreateFPToFPLocations(allocator_, invoke);
502 }
503
VisitMathSqrt(HInvoke * invoke)504 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
505 LocationSummary* locations = invoke->GetLocations();
506 MacroAssembler* masm = GetVIXLAssembler();
507 __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
508 }
509
VisitMathCeil(HInvoke * invoke)510 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
511 CreateFPToFPLocations(allocator_, invoke);
512 }
513
VisitMathCeil(HInvoke * invoke)514 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
515 LocationSummary* locations = invoke->GetLocations();
516 MacroAssembler* masm = GetVIXLAssembler();
517 __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
518 }
519
VisitMathFloor(HInvoke * invoke)520 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
521 CreateFPToFPLocations(allocator_, invoke);
522 }
523
VisitMathFloor(HInvoke * invoke)524 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
525 LocationSummary* locations = invoke->GetLocations();
526 MacroAssembler* masm = GetVIXLAssembler();
527 __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
528 }
529
VisitMathRint(HInvoke * invoke)530 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
531 CreateFPToFPLocations(allocator_, invoke);
532 }
533
VisitMathRint(HInvoke * invoke)534 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
535 LocationSummary* locations = invoke->GetLocations();
536 MacroAssembler* masm = GetVIXLAssembler();
537 __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
538 }
539
CreateFPToIntPlusFPTempLocations(ArenaAllocator * allocator,HInvoke * invoke)540 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
541 LocationSummary* locations =
542 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
543 locations->SetInAt(0, Location::RequiresFpuRegister());
544 locations->SetOut(Location::RequiresRegister());
545 locations->AddTemp(Location::RequiresFpuRegister());
546 }
547
GenMathRound(HInvoke * invoke,bool is_double,vixl::aarch64::MacroAssembler * masm)548 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
549 // Java 8 API definition for Math.round():
550 // Return the closest long or int to the argument, with ties rounding to positive infinity.
551 //
552 // There is no single instruction in ARMv8 that can support the above definition.
553 // We choose to use FCVTAS here, because it has closest semantic.
554 // FCVTAS performs rounding to nearest integer, ties away from zero.
555 // For most inputs (positive values, zero or NaN), this instruction is enough.
556 // We only need a few handling code after FCVTAS if the input is negative half value.
557 //
558 // The reason why we didn't choose FCVTPS instruction here is that
559 // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
560 // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
561 // If we were using this instruction, for most inputs, more handling code would be needed.
562 LocationSummary* l = invoke->GetLocations();
563 VRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
564 VRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
565 Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
566 vixl::aarch64::Label done;
567
568 // Round to nearest integer, ties away from zero.
569 __ Fcvtas(out_reg, in_reg);
570
571 // For positive values, zero or NaN inputs, rounding is done.
572 __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
573
574 // Handle input < 0 cases.
575 // If input is negative but not a tie, previous result (round to nearest) is valid.
576 // If input is a negative tie, out_reg += 1.
577 __ Frinta(tmp_fp, in_reg);
578 __ Fsub(tmp_fp, in_reg, tmp_fp);
579 __ Fcmp(tmp_fp, 0.5);
580 __ Cinc(out_reg, out_reg, eq);
581
582 __ Bind(&done);
583 }
584
VisitMathRoundDouble(HInvoke * invoke)585 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
586 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
587 }
588
VisitMathRoundDouble(HInvoke * invoke)589 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
590 GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
591 }
592
VisitMathRoundFloat(HInvoke * invoke)593 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
594 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
595 }
596
VisitMathRoundFloat(HInvoke * invoke)597 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
598 GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
599 }
600
VisitMemoryPeekByte(HInvoke * invoke)601 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
602 CreateIntToIntLocations(allocator_, invoke);
603 }
604
VisitMemoryPeekByte(HInvoke * invoke)605 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
606 MacroAssembler* masm = GetVIXLAssembler();
607 __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
608 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
609 }
610
VisitMemoryPeekIntNative(HInvoke * invoke)611 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
612 CreateIntToIntLocations(allocator_, invoke);
613 }
614
VisitMemoryPeekIntNative(HInvoke * invoke)615 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
616 MacroAssembler* masm = GetVIXLAssembler();
617 __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
618 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
619 }
620
VisitMemoryPeekLongNative(HInvoke * invoke)621 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
622 CreateIntToIntLocations(allocator_, invoke);
623 }
624
VisitMemoryPeekLongNative(HInvoke * invoke)625 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
626 MacroAssembler* masm = GetVIXLAssembler();
627 __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
628 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
629 }
630
VisitMemoryPeekShortNative(HInvoke * invoke)631 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
632 CreateIntToIntLocations(allocator_, invoke);
633 }
634
VisitMemoryPeekShortNative(HInvoke * invoke)635 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
636 MacroAssembler* masm = GetVIXLAssembler();
637 __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
638 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
639 }
640
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)641 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
642 LocationSummary* locations =
643 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
644 locations->SetInAt(0, Location::RequiresRegister());
645 locations->SetInAt(1, Location::RequiresRegister());
646 }
647
VisitMemoryPokeByte(HInvoke * invoke)648 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
649 CreateIntIntToVoidLocations(allocator_, invoke);
650 }
651
VisitMemoryPokeByte(HInvoke * invoke)652 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
653 MacroAssembler* masm = GetVIXLAssembler();
654 __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
655 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
656 }
657
VisitMemoryPokeIntNative(HInvoke * invoke)658 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
659 CreateIntIntToVoidLocations(allocator_, invoke);
660 }
661
VisitMemoryPokeIntNative(HInvoke * invoke)662 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
663 MacroAssembler* masm = GetVIXLAssembler();
664 __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
665 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
666 }
667
VisitMemoryPokeLongNative(HInvoke * invoke)668 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
669 CreateIntIntToVoidLocations(allocator_, invoke);
670 }
671
VisitMemoryPokeLongNative(HInvoke * invoke)672 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
673 MacroAssembler* masm = GetVIXLAssembler();
674 __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
675 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
676 }
677
VisitMemoryPokeShortNative(HInvoke * invoke)678 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
679 CreateIntIntToVoidLocations(allocator_, invoke);
680 }
681
VisitMemoryPokeShortNative(HInvoke * invoke)682 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
683 MacroAssembler* masm = GetVIXLAssembler();
684 __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
685 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
686 }
687
VisitThreadCurrentThread(HInvoke * invoke)688 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
689 LocationSummary* locations =
690 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
691 locations->SetOut(Location::RequiresRegister());
692 }
693
VisitThreadCurrentThread(HInvoke * invoke)694 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
695 codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
696 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
697 }
698
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)699 static void GenUnsafeGet(HInvoke* invoke,
700 DataType::Type type,
701 bool is_volatile,
702 CodeGeneratorARM64* codegen) {
703 LocationSummary* locations = invoke->GetLocations();
704 DCHECK((type == DataType::Type::kInt32) ||
705 (type == DataType::Type::kInt64) ||
706 (type == DataType::Type::kReference));
707 Location base_loc = locations->InAt(1);
708 Register base = WRegisterFrom(base_loc); // Object pointer.
709 Location offset_loc = locations->InAt(2);
710 Register offset = XRegisterFrom(offset_loc); // Long offset.
711 Location trg_loc = locations->Out();
712 Register trg = RegisterFrom(trg_loc, type);
713
714 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
715 // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
716 Register temp = WRegisterFrom(locations->GetTemp(0));
717 MacroAssembler* masm = codegen->GetVIXLAssembler();
718 // Piggy-back on the field load path using introspection for the Baker read barrier.
719 __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits.
720 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
721 trg_loc,
722 base,
723 MemOperand(temp.X()),
724 /* needs_null_check= */ false,
725 is_volatile);
726 } else {
727 // Other cases.
728 MemOperand mem_op(base.X(), offset);
729 if (is_volatile) {
730 codegen->LoadAcquire(invoke, type, trg, mem_op, /* needs_null_check= */ true);
731 } else {
732 codegen->Load(type, trg, mem_op);
733 }
734
735 if (type == DataType::Type::kReference) {
736 DCHECK(trg.IsW());
737 codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
738 }
739 }
740 }
741
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)742 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
743 bool can_call = kEmitCompilerReadBarrier &&
744 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
745 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
746 LocationSummary* locations =
747 new (allocator) LocationSummary(invoke,
748 can_call
749 ? LocationSummary::kCallOnSlowPath
750 : LocationSummary::kNoCall,
751 kIntrinsified);
752 if (can_call && kUseBakerReadBarrier) {
753 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
754 // We need a temporary register for the read barrier load in order to use
755 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
756 locations->AddTemp(FixedTempLocation());
757 }
758 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
759 locations->SetInAt(1, Location::RequiresRegister());
760 locations->SetInAt(2, Location::RequiresRegister());
761 locations->SetOut(Location::RequiresRegister(),
762 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
763 }
764
VisitUnsafeGet(HInvoke * invoke)765 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
766 CreateIntIntIntToIntLocations(allocator_, invoke);
767 }
VisitUnsafeGetVolatile(HInvoke * invoke)768 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
769 CreateIntIntIntToIntLocations(allocator_, invoke);
770 }
VisitUnsafeGetLong(HInvoke * invoke)771 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
772 CreateIntIntIntToIntLocations(allocator_, invoke);
773 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)774 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
775 CreateIntIntIntToIntLocations(allocator_, invoke);
776 }
VisitUnsafeGetObject(HInvoke * invoke)777 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
778 CreateIntIntIntToIntLocations(allocator_, invoke);
779 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)780 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
781 CreateIntIntIntToIntLocations(allocator_, invoke);
782 }
783
VisitUnsafeGet(HInvoke * invoke)784 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
785 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
786 }
VisitUnsafeGetVolatile(HInvoke * invoke)787 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
788 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
789 }
VisitUnsafeGetLong(HInvoke * invoke)790 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
791 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
792 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)793 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
794 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
795 }
VisitUnsafeGetObject(HInvoke * invoke)796 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
797 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
798 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)799 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
800 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
801 }
802
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,HInvoke * invoke)803 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
804 LocationSummary* locations =
805 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
806 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
807 locations->SetInAt(1, Location::RequiresRegister());
808 locations->SetInAt(2, Location::RequiresRegister());
809 locations->SetInAt(3, Location::RequiresRegister());
810 }
811
VisitUnsafePut(HInvoke * invoke)812 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
813 CreateIntIntIntIntToVoid(allocator_, invoke);
814 }
VisitUnsafePutOrdered(HInvoke * invoke)815 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
816 CreateIntIntIntIntToVoid(allocator_, invoke);
817 }
VisitUnsafePutVolatile(HInvoke * invoke)818 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
819 CreateIntIntIntIntToVoid(allocator_, invoke);
820 }
VisitUnsafePutObject(HInvoke * invoke)821 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
822 CreateIntIntIntIntToVoid(allocator_, invoke);
823 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)824 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
825 CreateIntIntIntIntToVoid(allocator_, invoke);
826 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)827 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
828 CreateIntIntIntIntToVoid(allocator_, invoke);
829 }
VisitUnsafePutLong(HInvoke * invoke)830 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
831 CreateIntIntIntIntToVoid(allocator_, invoke);
832 }
VisitUnsafePutLongOrdered(HInvoke * invoke)833 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
834 CreateIntIntIntIntToVoid(allocator_, invoke);
835 }
VisitUnsafePutLongVolatile(HInvoke * invoke)836 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
837 CreateIntIntIntIntToVoid(allocator_, invoke);
838 }
839
GenUnsafePut(HInvoke * invoke,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)840 static void GenUnsafePut(HInvoke* invoke,
841 DataType::Type type,
842 bool is_volatile,
843 bool is_ordered,
844 CodeGeneratorARM64* codegen) {
845 LocationSummary* locations = invoke->GetLocations();
846 MacroAssembler* masm = codegen->GetVIXLAssembler();
847
848 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
849 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
850 Register value = RegisterFrom(locations->InAt(3), type);
851 Register source = value;
852 MemOperand mem_op(base.X(), offset);
853
854 {
855 // We use a block to end the scratch scope before the write barrier, thus
856 // freeing the temporary registers so they can be used in `MarkGCCard`.
857 UseScratchRegisterScope temps(masm);
858
859 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
860 DCHECK(value.IsW());
861 Register temp = temps.AcquireW();
862 __ Mov(temp.W(), value.W());
863 codegen->GetAssembler()->PoisonHeapReference(temp.W());
864 source = temp;
865 }
866
867 if (is_volatile || is_ordered) {
868 codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
869 } else {
870 codegen->Store(type, source, mem_op);
871 }
872 }
873
874 if (type == DataType::Type::kReference) {
875 bool value_can_be_null = true; // TODO: Worth finding out this information?
876 codegen->MarkGCCard(base, value, value_can_be_null);
877 }
878 }
879
VisitUnsafePut(HInvoke * invoke)880 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
881 GenUnsafePut(invoke,
882 DataType::Type::kInt32,
883 /* is_volatile= */ false,
884 /* is_ordered= */ false,
885 codegen_);
886 }
VisitUnsafePutOrdered(HInvoke * invoke)887 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
888 GenUnsafePut(invoke,
889 DataType::Type::kInt32,
890 /* is_volatile= */ false,
891 /* is_ordered= */ true,
892 codegen_);
893 }
VisitUnsafePutVolatile(HInvoke * invoke)894 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
895 GenUnsafePut(invoke,
896 DataType::Type::kInt32,
897 /* is_volatile= */ true,
898 /* is_ordered= */ false,
899 codegen_);
900 }
VisitUnsafePutObject(HInvoke * invoke)901 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
902 GenUnsafePut(invoke,
903 DataType::Type::kReference,
904 /* is_volatile= */ false,
905 /* is_ordered= */ false,
906 codegen_);
907 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)908 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
909 GenUnsafePut(invoke,
910 DataType::Type::kReference,
911 /* is_volatile= */ false,
912 /* is_ordered= */ true,
913 codegen_);
914 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)915 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
916 GenUnsafePut(invoke,
917 DataType::Type::kReference,
918 /* is_volatile= */ true,
919 /* is_ordered= */ false,
920 codegen_);
921 }
VisitUnsafePutLong(HInvoke * invoke)922 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
923 GenUnsafePut(invoke,
924 DataType::Type::kInt64,
925 /* is_volatile= */ false,
926 /* is_ordered= */ false,
927 codegen_);
928 }
VisitUnsafePutLongOrdered(HInvoke * invoke)929 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
930 GenUnsafePut(invoke,
931 DataType::Type::kInt64,
932 /* is_volatile= */ false,
933 /* is_ordered= */ true,
934 codegen_);
935 }
VisitUnsafePutLongVolatile(HInvoke * invoke)936 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
937 GenUnsafePut(invoke,
938 DataType::Type::kInt64,
939 /* is_volatile= */ true,
940 /* is_ordered= */ false,
941 codegen_);
942 }
943
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke)944 static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
945 bool can_call = kEmitCompilerReadBarrier &&
946 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
947 LocationSummary* locations =
948 new (allocator) LocationSummary(invoke,
949 can_call
950 ? LocationSummary::kCallOnSlowPath
951 : LocationSummary::kNoCall,
952 kIntrinsified);
953 if (can_call && kUseBakerReadBarrier) {
954 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
955 }
956 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
957 locations->SetInAt(1, Location::RequiresRegister());
958 locations->SetInAt(2, Location::RequiresRegister());
959 locations->SetInAt(3, Location::RequiresRegister());
960 locations->SetInAt(4, Location::RequiresRegister());
961
962 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
963 }
964
EmitLoadExclusive(CodeGeneratorARM64 * codegen,DataType::Type type,Register ptr,Register old_value,bool use_load_acquire)965 static void EmitLoadExclusive(CodeGeneratorARM64* codegen,
966 DataType::Type type,
967 Register ptr,
968 Register old_value,
969 bool use_load_acquire) {
970 Arm64Assembler* assembler = codegen->GetAssembler();
971 MacroAssembler* masm = assembler->GetVIXLAssembler();
972 switch (type) {
973 case DataType::Type::kBool:
974 case DataType::Type::kUint8:
975 case DataType::Type::kInt8:
976 if (use_load_acquire) {
977 __ Ldaxrb(old_value, MemOperand(ptr));
978 } else {
979 __ Ldxrb(old_value, MemOperand(ptr));
980 }
981 break;
982 case DataType::Type::kUint16:
983 case DataType::Type::kInt16:
984 if (use_load_acquire) {
985 __ Ldaxrh(old_value, MemOperand(ptr));
986 } else {
987 __ Ldxrh(old_value, MemOperand(ptr));
988 }
989 break;
990 case DataType::Type::kInt32:
991 case DataType::Type::kInt64:
992 case DataType::Type::kReference:
993 if (use_load_acquire) {
994 __ Ldaxr(old_value, MemOperand(ptr));
995 } else {
996 __ Ldxr(old_value, MemOperand(ptr));
997 }
998 break;
999 default:
1000 LOG(FATAL) << "Unexpected type: " << type;
1001 UNREACHABLE();
1002 }
1003 switch (type) {
1004 case DataType::Type::kInt8:
1005 __ Sxtb(old_value, old_value);
1006 break;
1007 case DataType::Type::kInt16:
1008 __ Sxth(old_value, old_value);
1009 break;
1010 case DataType::Type::kReference:
1011 assembler->MaybeUnpoisonHeapReference(old_value);
1012 break;
1013 default:
1014 break;
1015 }
1016 }
1017
EmitStoreExclusive(CodeGeneratorARM64 * codegen,DataType::Type type,Register ptr,Register store_result,Register new_value,bool use_store_release)1018 static void EmitStoreExclusive(CodeGeneratorARM64* codegen,
1019 DataType::Type type,
1020 Register ptr,
1021 Register store_result,
1022 Register new_value,
1023 bool use_store_release) {
1024 Arm64Assembler* assembler = codegen->GetAssembler();
1025 MacroAssembler* masm = assembler->GetVIXLAssembler();
1026 if (type == DataType::Type::kReference) {
1027 assembler->MaybePoisonHeapReference(new_value);
1028 }
1029 switch (type) {
1030 case DataType::Type::kBool:
1031 case DataType::Type::kUint8:
1032 case DataType::Type::kInt8:
1033 if (use_store_release) {
1034 __ Stlxrb(store_result, new_value, MemOperand(ptr));
1035 } else {
1036 __ Stxrb(store_result, new_value, MemOperand(ptr));
1037 }
1038 break;
1039 case DataType::Type::kUint16:
1040 case DataType::Type::kInt16:
1041 if (use_store_release) {
1042 __ Stlxrh(store_result, new_value, MemOperand(ptr));
1043 } else {
1044 __ Stxrh(store_result, new_value, MemOperand(ptr));
1045 }
1046 break;
1047 case DataType::Type::kInt32:
1048 case DataType::Type::kInt64:
1049 case DataType::Type::kReference:
1050 if (use_store_release) {
1051 __ Stlxr(store_result, new_value, MemOperand(ptr));
1052 } else {
1053 __ Stxr(store_result, new_value, MemOperand(ptr));
1054 }
1055 break;
1056 default:
1057 LOG(FATAL) << "Unexpected type: " << type;
1058 UNREACHABLE();
1059 }
1060 if (type == DataType::Type::kReference) {
1061 assembler->MaybeUnpoisonHeapReference(new_value);
1062 }
1063 }
1064
GenerateCompareAndSet(CodeGeneratorARM64 * codegen,DataType::Type type,std::memory_order order,bool strong,vixl::aarch64::Label * cmp_failure,Register ptr,Register new_value,Register old_value,Register store_result,Register expected,Register expected2=Register ())1065 static void GenerateCompareAndSet(CodeGeneratorARM64* codegen,
1066 DataType::Type type,
1067 std::memory_order order,
1068 bool strong,
1069 vixl::aarch64::Label* cmp_failure,
1070 Register ptr,
1071 Register new_value,
1072 Register old_value,
1073 Register store_result,
1074 Register expected,
1075 Register expected2 = Register()) {
1076 // The `expected2` is valid only for reference slow path and represents the unmarked old value
1077 // from the main path attempt to emit CAS when the marked old value matched `expected`.
1078 DCHECK(type == DataType::Type::kReference || !expected2.IsValid());
1079
1080 DCHECK(ptr.IsX());
1081 DCHECK_EQ(new_value.IsX(), type == DataType::Type::kInt64);
1082 DCHECK_EQ(old_value.IsX(), type == DataType::Type::kInt64);
1083 DCHECK(store_result.IsW());
1084 DCHECK_EQ(expected.IsX(), type == DataType::Type::kInt64);
1085 DCHECK(!expected2.IsValid() || expected2.IsW());
1086
1087 Arm64Assembler* assembler = codegen->GetAssembler();
1088 MacroAssembler* masm = assembler->GetVIXLAssembler();
1089
1090 bool use_load_acquire =
1091 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1092 bool use_store_release =
1093 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1094 DCHECK(use_load_acquire || use_store_release || order == std::memory_order_relaxed);
1095
1096 // repeat: {
1097 // old_value = [ptr]; // Load exclusive.
1098 // if (old_value != expected && old_value != expected2) goto cmp_failure;
1099 // store_result = failed([ptr] <- new_value); // Store exclusive.
1100 // }
1101 // if (strong) {
1102 // if (store_result) goto repeat; // Repeat until compare fails or store exclusive succeeds.
1103 // } else {
1104 // store_result = store_result ^ 1; // Report success as 1, failure as 0.
1105 // }
1106 //
1107 // Flag Z indicates whether `old_value == expected || old_value == expected2`.
1108 // (Is `expected2` is not valid, the `old_value == expected2` part is not emitted.)
1109
1110 vixl::aarch64::Label loop_head;
1111 if (strong) {
1112 __ Bind(&loop_head);
1113 }
1114 EmitLoadExclusive(codegen, type, ptr, old_value, use_load_acquire);
1115 __ Cmp(old_value, expected);
1116 if (expected2.IsValid()) {
1117 __ Ccmp(old_value, expected2, ZFlag, ne);
1118 }
1119 // If the comparison failed, the Z flag is cleared as we branch to the `cmp_failure` label.
1120 // If the comparison succeeded, the Z flag is set and remains set after the end of the
1121 // code emitted here, unless we retry the whole operation.
1122 __ B(cmp_failure, ne);
1123 EmitStoreExclusive(codegen, type, ptr, store_result, new_value, use_store_release);
1124 if (strong) {
1125 __ Cbnz(store_result, &loop_head);
1126 } else {
1127 // Flip the `store_result` register to indicate success by 1 and failure by 0.
1128 __ Eor(store_result, store_result, 1);
1129 }
1130 }
1131
1132 class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
1133 public:
ReadBarrierCasSlowPathARM64(HInvoke * invoke,std::memory_order order,bool strong,Register base,Register offset,Register expected,Register new_value,Register old_value,Register old_value_temp,Register store_result,bool update_old_value,CodeGeneratorARM64 * arm64_codegen)1134 ReadBarrierCasSlowPathARM64(HInvoke* invoke,
1135 std::memory_order order,
1136 bool strong,
1137 Register base,
1138 Register offset,
1139 Register expected,
1140 Register new_value,
1141 Register old_value,
1142 Register old_value_temp,
1143 Register store_result,
1144 bool update_old_value,
1145 CodeGeneratorARM64* arm64_codegen)
1146 : SlowPathCodeARM64(invoke),
1147 order_(order),
1148 strong_(strong),
1149 base_(base),
1150 offset_(offset),
1151 expected_(expected),
1152 new_value_(new_value),
1153 old_value_(old_value),
1154 old_value_temp_(old_value_temp),
1155 store_result_(store_result),
1156 update_old_value_(update_old_value),
1157 mark_old_value_slow_path_(nullptr),
1158 update_old_value_slow_path_(nullptr) {
1159 if (!kUseBakerReadBarrier) {
1160 // We need to add the slow path now, it is too late when emitting slow path code.
1161 mark_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1162 invoke,
1163 Location::RegisterLocation(old_value_temp.GetCode()),
1164 Location::RegisterLocation(old_value.GetCode()),
1165 Location::RegisterLocation(base.GetCode()),
1166 /*offset=*/ 0u,
1167 /*index=*/ Location::RegisterLocation(offset.GetCode()));
1168 if (update_old_value_) {
1169 update_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1170 invoke,
1171 Location::RegisterLocation(old_value.GetCode()),
1172 Location::RegisterLocation(old_value_temp.GetCode()),
1173 Location::RegisterLocation(base.GetCode()),
1174 /*offset=*/ 0u,
1175 /*index=*/ Location::RegisterLocation(offset.GetCode()));
1176 }
1177 }
1178 }
1179
GetDescription() const1180 const char* GetDescription() const override { return "ReadBarrierCasSlowPathARM64"; }
1181
EmitNativeCode(CodeGenerator * codegen)1182 void EmitNativeCode(CodeGenerator* codegen) override {
1183 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1184 Arm64Assembler* assembler = arm64_codegen->GetAssembler();
1185 MacroAssembler* masm = assembler->GetVIXLAssembler();
1186 __ Bind(GetEntryLabel());
1187
1188 // Mark the `old_value_` from the main path and compare with `expected_`.
1189 if (kUseBakerReadBarrier) {
1190 DCHECK(mark_old_value_slow_path_ == nullptr);
1191 arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
1192 } else {
1193 DCHECK(mark_old_value_slow_path_ != nullptr);
1194 __ B(mark_old_value_slow_path_->GetEntryLabel());
1195 __ Bind(mark_old_value_slow_path_->GetExitLabel());
1196 }
1197 __ Cmp(old_value_temp_, expected_);
1198 if (update_old_value_) {
1199 // Update the old value if we're going to return from the slow path.
1200 __ Csel(old_value_, old_value_temp_, old_value_, ne);
1201 }
1202 __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure.
1203
1204 // The `old_value` we have read did not match `expected` (which is always a to-space
1205 // reference) but after the read barrier the marked to-space value matched, so the
1206 // `old_value` must be a from-space reference to the same object. Do the same CAS loop
1207 // as the main path but check for both `expected` and the unmarked old value
1208 // representing the to-space and from-space references for the same object.
1209
1210 UseScratchRegisterScope temps(masm);
1211 DCHECK(!store_result_.IsValid() || !temps.IsAvailable(store_result_));
1212 Register tmp_ptr = temps.AcquireX();
1213 Register store_result = store_result_.IsValid() ? store_result_ : temps.AcquireW();
1214
1215 // Recalculate the `tmp_ptr` from main path clobbered by the read barrier above.
1216 __ Add(tmp_ptr, base_.X(), Operand(offset_));
1217
1218 vixl::aarch64::Label mark_old_value;
1219 GenerateCompareAndSet(arm64_codegen,
1220 DataType::Type::kReference,
1221 order_,
1222 strong_,
1223 /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
1224 tmp_ptr,
1225 new_value_,
1226 /*old_value=*/ old_value_temp_,
1227 store_result,
1228 expected_,
1229 /*expected2=*/ old_value_);
1230 if (update_old_value_) {
1231 // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
1232 // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
1233 __ Mov(old_value_, expected_);
1234 }
1235
1236 // Z=true from the CMP+CCMP in GenerateCompareAndSet() above indicates comparison success.
1237 // For strong CAS, that's the overall success. For weak CAS, the code also needs
1238 // to check the `store_result` after returning from the slow path.
1239 __ B(GetExitLabel());
1240
1241 if (update_old_value_) {
1242 __ Bind(&mark_old_value);
1243 if (kUseBakerReadBarrier) {
1244 DCHECK(update_old_value_slow_path_ == nullptr);
1245 arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
1246 } else {
1247 // Note: We could redirect the `failure` above directly to the entry label and bind
1248 // the exit label in the main path, but the main path would need to access the
1249 // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
1250 DCHECK(update_old_value_slow_path_ != nullptr);
1251 __ B(update_old_value_slow_path_->GetEntryLabel());
1252 __ Bind(update_old_value_slow_path_->GetExitLabel());
1253 }
1254 __ B(GetExitLabel());
1255 }
1256 }
1257
1258 private:
1259 std::memory_order order_;
1260 bool strong_;
1261 Register base_;
1262 Register offset_;
1263 Register expected_;
1264 Register new_value_;
1265 Register old_value_;
1266 Register old_value_temp_;
1267 Register store_result_;
1268 bool update_old_value_;
1269 SlowPathCodeARM64* mark_old_value_slow_path_;
1270 SlowPathCodeARM64* update_old_value_slow_path_;
1271 };
1272
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARM64 * codegen)1273 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1274 MacroAssembler* masm = codegen->GetVIXLAssembler();
1275 LocationSummary* locations = invoke->GetLocations();
1276
1277 Register out = WRegisterFrom(locations->Out()); // Boolean result.
1278 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
1279 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
1280 Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
1281 Register new_value = RegisterFrom(locations->InAt(4), type); // New value.
1282
1283 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1284 if (type == DataType::Type::kReference) {
1285 // Mark card for object assuming new value is stored.
1286 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
1287 codegen->MarkGCCard(base, new_value, new_value_can_be_null);
1288 }
1289
1290 UseScratchRegisterScope temps(masm);
1291 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
1292 Register old_value; // Value in memory.
1293
1294 vixl::aarch64::Label exit_loop_label;
1295 vixl::aarch64::Label* exit_loop = &exit_loop_label;
1296 vixl::aarch64::Label* cmp_failure = &exit_loop_label;
1297
1298 if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
1299 // We need to store the `old_value` in a non-scratch register to make sure
1300 // the read barrier in the slow path does not clobber it.
1301 old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path.
1302 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
1303 // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
1304 Register old_value_temp = WRegisterFrom(locations->GetTemp(1));
1305 ReadBarrierCasSlowPathARM64* slow_path =
1306 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
1307 invoke,
1308 std::memory_order_seq_cst,
1309 /*strong=*/ true,
1310 base,
1311 offset,
1312 expected,
1313 new_value,
1314 old_value,
1315 old_value_temp,
1316 /*store_result=*/ Register(), // Use a scratch register.
1317 /*update_old_value=*/ false,
1318 codegen);
1319 codegen->AddSlowPath(slow_path);
1320 exit_loop = slow_path->GetExitLabel();
1321 cmp_failure = slow_path->GetEntryLabel();
1322 } else {
1323 old_value = temps.AcquireSameSizeAs(new_value);
1324 }
1325
1326 __ Add(tmp_ptr, base.X(), Operand(offset));
1327
1328 GenerateCompareAndSet(codegen,
1329 type,
1330 std::memory_order_seq_cst,
1331 /*strong=*/ true,
1332 cmp_failure,
1333 tmp_ptr,
1334 new_value,
1335 old_value,
1336 /*store_result=*/ old_value.W(), // Reuse `old_value` for ST*XR* result.
1337 expected);
1338 __ Bind(exit_loop);
1339 __ Cset(out, eq);
1340 }
1341
VisitUnsafeCASInt(HInvoke * invoke)1342 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1343 CreateUnsafeCASLocations(allocator_, invoke);
1344 }
VisitUnsafeCASLong(HInvoke * invoke)1345 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1346 CreateUnsafeCASLocations(allocator_, invoke);
1347 }
VisitUnsafeCASObject(HInvoke * invoke)1348 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1349 // The only read barrier implementation supporting the
1350 // UnsafeCASObject intrinsic is the Baker-style read barriers. b/173104084
1351 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1352 return;
1353 }
1354
1355 CreateUnsafeCASLocations(allocator_, invoke);
1356 if (kEmitCompilerReadBarrier) {
1357 // We need two non-scratch temporary registers for read barrier.
1358 LocationSummary* locations = invoke->GetLocations();
1359 if (kUseBakerReadBarrier) {
1360 locations->AddTemp(Location::RequiresRegister());
1361 locations->AddTemp(Location::RequiresRegister());
1362 } else {
1363 // To preserve the old value across the non-Baker read barrier
1364 // slow path, use a fixed callee-save register.
1365 constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
1366 locations->AddTemp(Location::RegisterLocation(first_callee_save));
1367 // To reduce the number of moves, request x0 as the second temporary.
1368 DCHECK(InvokeRuntimeCallingConvention().GetReturnLocation(DataType::Type::kReference).Equals(
1369 Location::RegisterLocation(x0.GetCode())));
1370 locations->AddTemp(Location::RegisterLocation(x0.GetCode()));
1371 }
1372 }
1373 }
1374
VisitUnsafeCASInt(HInvoke * invoke)1375 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1376 GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
1377 }
VisitUnsafeCASLong(HInvoke * invoke)1378 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1379 GenUnsafeCas(invoke, DataType::Type::kInt64, codegen_);
1380 }
VisitUnsafeCASObject(HInvoke * invoke)1381 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1382 GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
1383 }
1384
1385 enum class GetAndUpdateOp {
1386 kSet,
1387 kAdd,
1388 kAddWithByteSwap,
1389 kAnd,
1390 kOr,
1391 kXor
1392 };
1393
GenerateGetAndUpdate(CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,std::memory_order order,Register ptr,CPURegister arg,CPURegister old_value)1394 static void GenerateGetAndUpdate(CodeGeneratorARM64* codegen,
1395 GetAndUpdateOp get_and_update_op,
1396 DataType::Type load_store_type,
1397 std::memory_order order,
1398 Register ptr,
1399 CPURegister arg,
1400 CPURegister old_value) {
1401 MacroAssembler* masm = codegen->GetVIXLAssembler();
1402 UseScratchRegisterScope temps(masm);
1403 Register store_result = temps.AcquireW();
1404
1405 DCHECK_EQ(old_value.GetSizeInBits(), arg.GetSizeInBits());
1406 Register old_value_reg;
1407 Register new_value;
1408 switch (get_and_update_op) {
1409 case GetAndUpdateOp::kSet:
1410 old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1411 new_value = arg.IsX() ? arg.X() : arg.W();
1412 break;
1413 case GetAndUpdateOp::kAddWithByteSwap:
1414 case GetAndUpdateOp::kAdd:
1415 if (arg.IsVRegister()) {
1416 old_value_reg = arg.IsD() ? temps.AcquireX() : temps.AcquireW();
1417 new_value = old_value_reg; // Use the same temporary.
1418 break;
1419 }
1420 FALLTHROUGH_INTENDED;
1421 case GetAndUpdateOp::kAnd:
1422 case GetAndUpdateOp::kOr:
1423 case GetAndUpdateOp::kXor:
1424 old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1425 new_value = old_value.IsX() ? temps.AcquireX() : temps.AcquireW();
1426 break;
1427 }
1428
1429 bool use_load_acquire =
1430 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1431 bool use_store_release =
1432 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1433 DCHECK(use_load_acquire || use_store_release);
1434
1435 vixl::aarch64::Label loop_label;
1436 __ Bind(&loop_label);
1437 EmitLoadExclusive(codegen, load_store_type, ptr, old_value_reg, use_load_acquire);
1438 switch (get_and_update_op) {
1439 case GetAndUpdateOp::kSet:
1440 break;
1441 case GetAndUpdateOp::kAddWithByteSwap:
1442 // To avoid unnecessary sign extension before REV16, the caller must specify `kUint16`
1443 // instead of `kInt16` and do the sign-extension explicitly afterwards.
1444 DCHECK_NE(load_store_type, DataType::Type::kInt16);
1445 GenerateReverseBytes(masm, load_store_type, old_value_reg, old_value_reg);
1446 FALLTHROUGH_INTENDED;
1447 case GetAndUpdateOp::kAdd:
1448 if (arg.IsVRegister()) {
1449 VRegister old_value_vreg = old_value.IsD() ? old_value.D() : old_value.S();
1450 VRegister sum = temps.AcquireSameSizeAs(old_value_vreg);
1451 __ Fmov(old_value_vreg, old_value_reg);
1452 __ Fadd(sum, old_value_vreg, arg.IsD() ? arg.D() : arg.S());
1453 __ Fmov(new_value, sum);
1454 } else {
1455 __ Add(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1456 }
1457 if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
1458 GenerateReverseBytes(masm, load_store_type, new_value, new_value);
1459 }
1460 break;
1461 case GetAndUpdateOp::kAnd:
1462 __ And(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1463 break;
1464 case GetAndUpdateOp::kOr:
1465 __ Orr(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1466 break;
1467 case GetAndUpdateOp::kXor:
1468 __ Eor(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1469 break;
1470 }
1471 EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value, use_store_release);
1472 __ Cbnz(store_result, &loop_label);
1473 }
1474
VisitStringCompareTo(HInvoke * invoke)1475 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
1476 LocationSummary* locations =
1477 new (allocator_) LocationSummary(invoke,
1478 invoke->InputAt(1)->CanBeNull()
1479 ? LocationSummary::kCallOnSlowPath
1480 : LocationSummary::kNoCall,
1481 kIntrinsified);
1482 locations->SetInAt(0, Location::RequiresRegister());
1483 locations->SetInAt(1, Location::RequiresRegister());
1484 locations->AddTemp(Location::RequiresRegister());
1485 locations->AddTemp(Location::RequiresRegister());
1486 locations->AddTemp(Location::RequiresRegister());
1487 // Need temporary registers for String compression's feature.
1488 if (mirror::kUseStringCompression) {
1489 locations->AddTemp(Location::RequiresRegister());
1490 }
1491 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1492 }
1493
VisitStringCompareTo(HInvoke * invoke)1494 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
1495 MacroAssembler* masm = GetVIXLAssembler();
1496 LocationSummary* locations = invoke->GetLocations();
1497
1498 Register str = InputRegisterAt(invoke, 0);
1499 Register arg = InputRegisterAt(invoke, 1);
1500 DCHECK(str.IsW());
1501 DCHECK(arg.IsW());
1502 Register out = OutputRegister(invoke);
1503
1504 Register temp0 = WRegisterFrom(locations->GetTemp(0));
1505 Register temp1 = WRegisterFrom(locations->GetTemp(1));
1506 Register temp2 = WRegisterFrom(locations->GetTemp(2));
1507 Register temp3;
1508 if (mirror::kUseStringCompression) {
1509 temp3 = WRegisterFrom(locations->GetTemp(3));
1510 }
1511
1512 vixl::aarch64::Label loop;
1513 vixl::aarch64::Label find_char_diff;
1514 vixl::aarch64::Label end;
1515 vixl::aarch64::Label different_compression;
1516
1517 // Get offsets of count and value fields within a string object.
1518 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1519 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1520
1521 // Note that the null check must have been done earlier.
1522 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1523
1524 // Take slow path and throw if input can be and is null.
1525 SlowPathCodeARM64* slow_path = nullptr;
1526 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1527 if (can_slow_path) {
1528 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1529 codegen_->AddSlowPath(slow_path);
1530 __ Cbz(arg, slow_path->GetEntryLabel());
1531 }
1532
1533 // Reference equality check, return 0 if same reference.
1534 __ Subs(out, str, arg);
1535 __ B(&end, eq);
1536
1537 if (mirror::kUseStringCompression) {
1538 // Load `count` fields of this and argument strings.
1539 __ Ldr(temp3, HeapOperand(str, count_offset));
1540 __ Ldr(temp2, HeapOperand(arg, count_offset));
1541 // Clean out compression flag from lengths.
1542 __ Lsr(temp0, temp3, 1u);
1543 __ Lsr(temp1, temp2, 1u);
1544 } else {
1545 // Load lengths of this and argument strings.
1546 __ Ldr(temp0, HeapOperand(str, count_offset));
1547 __ Ldr(temp1, HeapOperand(arg, count_offset));
1548 }
1549 // out = length diff.
1550 __ Subs(out, temp0, temp1);
1551 // temp0 = min(len(str), len(arg)).
1552 __ Csel(temp0, temp1, temp0, ge);
1553 // Shorter string is empty?
1554 __ Cbz(temp0, &end);
1555
1556 if (mirror::kUseStringCompression) {
1557 // Check if both strings using same compression style to use this comparison loop.
1558 __ Eor(temp2, temp2, Operand(temp3));
1559 // Interleave with compression flag extraction which is needed for both paths
1560 // and also set flags which is needed only for the different compressions path.
1561 __ Ands(temp3.W(), temp3.W(), Operand(1));
1562 __ Tbnz(temp2, 0, &different_compression); // Does not use flags.
1563 }
1564 // Store offset of string value in preparation for comparison loop.
1565 __ Mov(temp1, value_offset);
1566 if (mirror::kUseStringCompression) {
1567 // For string compression, calculate the number of bytes to compare (not chars).
1568 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1569 __ Lsl(temp0, temp0, temp3);
1570 }
1571
1572 UseScratchRegisterScope scratch_scope(masm);
1573 Register temp4 = scratch_scope.AcquireX();
1574
1575 // Assertions that must hold in order to compare strings 8 bytes at a time.
1576 DCHECK_ALIGNED(value_offset, 8);
1577 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1578
1579 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1580 DCHECK_EQ(char_size, 2u);
1581
1582 // Promote temp2 to an X reg, ready for LDR.
1583 temp2 = temp2.X();
1584
1585 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1586 __ Bind(&loop);
1587 __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
1588 __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1589 __ Cmp(temp4, temp2);
1590 __ B(ne, &find_char_diff);
1591 __ Add(temp1, temp1, char_size * 4);
1592 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1593 __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1594 __ B(&loop, hi);
1595 __ B(&end);
1596
1597 // Promote temp1 to an X reg, ready for EOR.
1598 temp1 = temp1.X();
1599
1600 // Find the single character difference.
1601 __ Bind(&find_char_diff);
1602 // Get the bit position of the first character that differs.
1603 __ Eor(temp1, temp2, temp4);
1604 __ Rbit(temp1, temp1);
1605 __ Clz(temp1, temp1);
1606
1607 // If the number of chars remaining <= the index where the difference occurs (0-3), then
1608 // the difference occurs outside the remaining string data, so just return length diff (out).
1609 // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1610 // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1611 // unsigned when string compression is disabled.
1612 // When it's enabled, the comparison must be unsigned.
1613 __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
1614 __ B(ls, &end);
1615
1616 // Extract the characters and calculate the difference.
1617 if (mirror:: kUseStringCompression) {
1618 __ Bic(temp1, temp1, 0x7);
1619 __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1620 } else {
1621 __ Bic(temp1, temp1, 0xf);
1622 }
1623 __ Lsr(temp2, temp2, temp1);
1624 __ Lsr(temp4, temp4, temp1);
1625 if (mirror::kUseStringCompression) {
1626 // Prioritize the case of compressed strings and calculate such result first.
1627 __ Uxtb(temp1, temp4);
1628 __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1629 __ Tbz(temp3, 0u, &end); // If actually compressed, we're done.
1630 }
1631 __ Uxth(temp4, temp4);
1632 __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
1633
1634 if (mirror::kUseStringCompression) {
1635 __ B(&end);
1636 __ Bind(&different_compression);
1637
1638 // Comparison for different compression style.
1639 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1640 DCHECK_EQ(c_char_size, 1u);
1641 temp1 = temp1.W();
1642 temp2 = temp2.W();
1643 temp4 = temp4.W();
1644
1645 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1646 // Note that flags have been set by the `str` compression flag extraction to `temp3`
1647 // before branching to the `different_compression` label.
1648 __ Csel(temp1, str, arg, eq); // Pointer to the compressed string.
1649 __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string.
1650
1651 // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1652 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1653 // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1654 // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1655 __ Lsl(temp0, temp0, 1u);
1656
1657 // Adjust temp1 and temp2 from string pointers to data pointers.
1658 __ Add(temp1, temp1, Operand(value_offset));
1659 __ Add(temp2, temp2, Operand(value_offset));
1660
1661 // Complete the move of the compression flag.
1662 __ Sub(temp0, temp0, Operand(temp3));
1663
1664 vixl::aarch64::Label different_compression_loop;
1665 vixl::aarch64::Label different_compression_diff;
1666
1667 __ Bind(&different_compression_loop);
1668 __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1669 __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1670 __ Subs(temp4, temp4, Operand(temp3));
1671 __ B(&different_compression_diff, ne);
1672 __ Subs(temp0, temp0, 2);
1673 __ B(&different_compression_loop, hi);
1674 __ B(&end);
1675
1676 // Calculate the difference.
1677 __ Bind(&different_compression_diff);
1678 __ Tst(temp0, Operand(1));
1679 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1680 "Expecting 0=compressed, 1=uncompressed");
1681 __ Cneg(out, temp4, ne);
1682 }
1683
1684 __ Bind(&end);
1685
1686 if (can_slow_path) {
1687 __ Bind(slow_path->GetExitLabel());
1688 }
1689 }
1690
1691 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1692 // The normal loop plus the pre-header is 9 instructions without string compression and 12
1693 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
1694 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1695 // to 10 instructions for the unrolled loop.
1696 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1697
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1698 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1699 if (candidate->IsLoadString()) {
1700 HLoadString* load_string = candidate->AsLoadString();
1701 const DexFile& dex_file = load_string->GetDexFile();
1702 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1703 }
1704 return nullptr;
1705 }
1706
VisitStringEquals(HInvoke * invoke)1707 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
1708 LocationSummary* locations =
1709 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1710 locations->SetInAt(0, Location::RequiresRegister());
1711 locations->SetInAt(1, Location::RequiresRegister());
1712
1713 // For the generic implementation and for long const strings we need a temporary.
1714 // We do not need it for short const strings, up to 8 bytes, see code generation below.
1715 uint32_t const_string_length = 0u;
1716 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1717 if (const_string == nullptr) {
1718 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1719 }
1720 bool is_compressed =
1721 mirror::kUseStringCompression &&
1722 const_string != nullptr &&
1723 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1724 if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1725 locations->AddTemp(Location::RequiresRegister());
1726 }
1727
1728 // TODO: If the String.equals() is used only for an immediately following HIf, we can
1729 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1730 // Then we shall need an extra temporary register instead of the output register.
1731 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1732 }
1733
VisitStringEquals(HInvoke * invoke)1734 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
1735 MacroAssembler* masm = GetVIXLAssembler();
1736 LocationSummary* locations = invoke->GetLocations();
1737
1738 Register str = WRegisterFrom(locations->InAt(0));
1739 Register arg = WRegisterFrom(locations->InAt(1));
1740 Register out = XRegisterFrom(locations->Out());
1741
1742 UseScratchRegisterScope scratch_scope(masm);
1743 Register temp = scratch_scope.AcquireW();
1744 Register temp1 = scratch_scope.AcquireW();
1745
1746 vixl::aarch64::Label loop;
1747 vixl::aarch64::Label end;
1748 vixl::aarch64::Label return_true;
1749 vixl::aarch64::Label return_false;
1750
1751 // Get offsets of count, value, and class fields within a string object.
1752 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1753 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1754 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1755
1756 // Note that the null check must have been done earlier.
1757 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1758
1759 StringEqualsOptimizations optimizations(invoke);
1760 if (!optimizations.GetArgumentNotNull()) {
1761 // Check if input is null, return false if it is.
1762 __ Cbz(arg, &return_false);
1763 }
1764
1765 // Reference equality check, return true if same reference.
1766 __ Cmp(str, arg);
1767 __ B(&return_true, eq);
1768
1769 if (!optimizations.GetArgumentIsString()) {
1770 // Instanceof check for the argument by comparing class fields.
1771 // All string objects must have the same type since String cannot be subclassed.
1772 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1773 // If the argument is a string object, its class field must be equal to receiver's class field.
1774 //
1775 // As the String class is expected to be non-movable, we can read the class
1776 // field from String.equals' arguments without read barriers.
1777 AssertNonMovableStringClass();
1778 // /* HeapReference<Class> */ temp = str->klass_
1779 __ Ldr(temp, MemOperand(str.X(), class_offset));
1780 // /* HeapReference<Class> */ temp1 = arg->klass_
1781 __ Ldr(temp1, MemOperand(arg.X(), class_offset));
1782 // Also, because we use the previously loaded class references only in the
1783 // following comparison, we don't need to unpoison them.
1784 __ Cmp(temp, temp1);
1785 __ B(&return_false, ne);
1786 }
1787
1788 // Check if one of the inputs is a const string. Do not special-case both strings
1789 // being const, such cases should be handled by constant folding if needed.
1790 uint32_t const_string_length = 0u;
1791 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1792 if (const_string == nullptr) {
1793 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1794 if (const_string != nullptr) {
1795 std::swap(str, arg); // Make sure the const string is in `str`.
1796 }
1797 }
1798 bool is_compressed =
1799 mirror::kUseStringCompression &&
1800 const_string != nullptr &&
1801 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1802
1803 if (const_string != nullptr) {
1804 // Load `count` field of the argument string and check if it matches the const string.
1805 // Also compares the compression style, if differs return false.
1806 __ Ldr(temp, MemOperand(arg.X(), count_offset));
1807 // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
1808 scratch_scope.Release(temp1);
1809 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1810 temp1 = scratch_scope.AcquireW();
1811 __ B(&return_false, ne);
1812 } else {
1813 // Load `count` fields of this and argument strings.
1814 __ Ldr(temp, MemOperand(str.X(), count_offset));
1815 __ Ldr(temp1, MemOperand(arg.X(), count_offset));
1816 // Check if `count` fields are equal, return false if they're not.
1817 // Also compares the compression style, if differs return false.
1818 __ Cmp(temp, temp1);
1819 __ B(&return_false, ne);
1820 }
1821
1822 // Assertions that must hold in order to compare strings 8 bytes at a time.
1823 // Ok to do this because strings are zero-padded to kObjectAlignment.
1824 DCHECK_ALIGNED(value_offset, 8);
1825 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1826
1827 if (const_string != nullptr &&
1828 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1829 : kShortConstStringEqualsCutoffInBytes / 2u)) {
1830 // Load and compare the contents. Though we know the contents of the short const string
1831 // at compile time, materializing constants may be more code than loading from memory.
1832 int32_t offset = value_offset;
1833 size_t remaining_bytes =
1834 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
1835 temp = temp.X();
1836 temp1 = temp1.X();
1837 while (remaining_bytes > sizeof(uint64_t)) {
1838 Register temp2 = XRegisterFrom(locations->GetTemp(0));
1839 __ Ldp(temp, temp1, MemOperand(str.X(), offset));
1840 __ Ldp(temp2, out, MemOperand(arg.X(), offset));
1841 __ Cmp(temp, temp2);
1842 __ Ccmp(temp1, out, NoFlag, eq);
1843 __ B(&return_false, ne);
1844 offset += 2u * sizeof(uint64_t);
1845 remaining_bytes -= 2u * sizeof(uint64_t);
1846 }
1847 if (remaining_bytes != 0u) {
1848 __ Ldr(temp, MemOperand(str.X(), offset));
1849 __ Ldr(temp1, MemOperand(arg.X(), offset));
1850 __ Cmp(temp, temp1);
1851 __ B(&return_false, ne);
1852 }
1853 } else {
1854 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1855 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1856 "Expecting 0=compressed, 1=uncompressed");
1857 __ Cbz(temp, &return_true);
1858
1859 if (mirror::kUseStringCompression) {
1860 // For string compression, calculate the number of bytes to compare (not chars).
1861 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1862 __ And(temp1, temp, Operand(1)); // Extract compression flag.
1863 __ Lsr(temp, temp, 1u); // Extract length.
1864 __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare.
1865 }
1866
1867 // Store offset of string value in preparation for comparison loop
1868 __ Mov(temp1, value_offset);
1869
1870 temp1 = temp1.X();
1871 Register temp2 = XRegisterFrom(locations->GetTemp(0));
1872 // Loop to compare strings 8 bytes at a time starting at the front of the string.
1873 __ Bind(&loop);
1874 __ Ldr(out, MemOperand(str.X(), temp1));
1875 __ Ldr(temp2, MemOperand(arg.X(), temp1));
1876 __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
1877 __ Cmp(out, temp2);
1878 __ B(&return_false, ne);
1879 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1880 __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
1881 __ B(&loop, hi);
1882 }
1883
1884 // Return true and exit the function.
1885 // If loop does not result in returning false, we return true.
1886 __ Bind(&return_true);
1887 __ Mov(out, 1);
1888 __ B(&end);
1889
1890 // Return false and exit the function.
1891 __ Bind(&return_false);
1892 __ Mov(out, 0);
1893 __ Bind(&end);
1894 }
1895
GenerateVisitStringIndexOf(HInvoke * invoke,MacroAssembler * masm,CodeGeneratorARM64 * codegen,bool start_at_zero)1896 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1897 MacroAssembler* masm,
1898 CodeGeneratorARM64* codegen,
1899 bool start_at_zero) {
1900 LocationSummary* locations = invoke->GetLocations();
1901
1902 // Note that the null check must have been done earlier.
1903 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1904
1905 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1906 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1907 SlowPathCodeARM64* slow_path = nullptr;
1908 HInstruction* code_point = invoke->InputAt(1);
1909 if (code_point->IsIntConstant()) {
1910 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
1911 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1912 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1913 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1914 codegen->AddSlowPath(slow_path);
1915 __ B(slow_path->GetEntryLabel());
1916 __ Bind(slow_path->GetExitLabel());
1917 return;
1918 }
1919 } else if (code_point->GetType() != DataType::Type::kUint16) {
1920 Register char_reg = WRegisterFrom(locations->InAt(1));
1921 __ Tst(char_reg, 0xFFFF0000);
1922 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1923 codegen->AddSlowPath(slow_path);
1924 __ B(ne, slow_path->GetEntryLabel());
1925 }
1926
1927 if (start_at_zero) {
1928 // Start-index = 0.
1929 Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
1930 __ Mov(tmp_reg, 0);
1931 }
1932
1933 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1934 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1935
1936 if (slow_path != nullptr) {
1937 __ Bind(slow_path->GetExitLabel());
1938 }
1939 }
1940
VisitStringIndexOf(HInvoke * invoke)1941 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
1942 LocationSummary* locations = new (allocator_) LocationSummary(
1943 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1944 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1945 // best to align the inputs accordingly.
1946 InvokeRuntimeCallingConvention calling_convention;
1947 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1948 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1949 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1950
1951 // Need to send start_index=0.
1952 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1953 }
1954
VisitStringIndexOf(HInvoke * invoke)1955 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
1956 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
1957 }
1958
VisitStringIndexOfAfter(HInvoke * invoke)1959 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1960 LocationSummary* locations = new (allocator_) LocationSummary(
1961 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1962 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1963 // best to align the inputs accordingly.
1964 InvokeRuntimeCallingConvention calling_convention;
1965 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1966 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1967 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1968 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1969 }
1970
VisitStringIndexOfAfter(HInvoke * invoke)1971 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1972 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
1973 }
1974
VisitStringNewStringFromBytes(HInvoke * invoke)1975 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1976 LocationSummary* locations = new (allocator_) LocationSummary(
1977 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1978 InvokeRuntimeCallingConvention calling_convention;
1979 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1980 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1981 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1982 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1983 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1984 }
1985
VisitStringNewStringFromBytes(HInvoke * invoke)1986 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1987 MacroAssembler* masm = GetVIXLAssembler();
1988 LocationSummary* locations = invoke->GetLocations();
1989
1990 Register byte_array = WRegisterFrom(locations->InAt(0));
1991 __ Cmp(byte_array, 0);
1992 SlowPathCodeARM64* slow_path =
1993 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1994 codegen_->AddSlowPath(slow_path);
1995 __ B(eq, slow_path->GetEntryLabel());
1996
1997 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1998 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1999 __ Bind(slow_path->GetExitLabel());
2000 }
2001
VisitStringNewStringFromChars(HInvoke * invoke)2002 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
2003 LocationSummary* locations =
2004 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2005 InvokeRuntimeCallingConvention calling_convention;
2006 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2007 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2008 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2009 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2010 }
2011
VisitStringNewStringFromChars(HInvoke * invoke)2012 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
2013 // No need to emit code checking whether `locations->InAt(2)` is a null
2014 // pointer, as callers of the native method
2015 //
2016 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
2017 //
2018 // all include a null check on `data` before calling that method.
2019 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
2020 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
2021 }
2022
VisitStringNewStringFromString(HInvoke * invoke)2023 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
2024 LocationSummary* locations = new (allocator_) LocationSummary(
2025 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2026 InvokeRuntimeCallingConvention calling_convention;
2027 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2028 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
2029 }
2030
VisitStringNewStringFromString(HInvoke * invoke)2031 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
2032 MacroAssembler* masm = GetVIXLAssembler();
2033 LocationSummary* locations = invoke->GetLocations();
2034
2035 Register string_to_copy = WRegisterFrom(locations->InAt(0));
2036 __ Cmp(string_to_copy, 0);
2037 SlowPathCodeARM64* slow_path =
2038 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2039 codegen_->AddSlowPath(slow_path);
2040 __ B(eq, slow_path->GetEntryLabel());
2041
2042 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
2043 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
2044 __ Bind(slow_path->GetExitLabel());
2045 }
2046
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2047 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2048 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2049 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2050 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2051
2052 LocationSummary* const locations =
2053 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2054 InvokeRuntimeCallingConvention calling_convention;
2055
2056 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2057 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2058 }
2059
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2060 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2061 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2062 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2063 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2064 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2065
2066 LocationSummary* const locations =
2067 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2068 InvokeRuntimeCallingConvention calling_convention;
2069
2070 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2071 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
2072 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2073 }
2074
GenFPToFPCall(HInvoke * invoke,CodeGeneratorARM64 * codegen,QuickEntrypointEnum entry)2075 static void GenFPToFPCall(HInvoke* invoke,
2076 CodeGeneratorARM64* codegen,
2077 QuickEntrypointEnum entry) {
2078 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2079 }
2080
VisitMathCos(HInvoke * invoke)2081 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
2082 CreateFPToFPCallLocations(allocator_, invoke);
2083 }
2084
VisitMathCos(HInvoke * invoke)2085 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
2086 GenFPToFPCall(invoke, codegen_, kQuickCos);
2087 }
2088
VisitMathSin(HInvoke * invoke)2089 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
2090 CreateFPToFPCallLocations(allocator_, invoke);
2091 }
2092
VisitMathSin(HInvoke * invoke)2093 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
2094 GenFPToFPCall(invoke, codegen_, kQuickSin);
2095 }
2096
VisitMathAcos(HInvoke * invoke)2097 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
2098 CreateFPToFPCallLocations(allocator_, invoke);
2099 }
2100
VisitMathAcos(HInvoke * invoke)2101 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
2102 GenFPToFPCall(invoke, codegen_, kQuickAcos);
2103 }
2104
VisitMathAsin(HInvoke * invoke)2105 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
2106 CreateFPToFPCallLocations(allocator_, invoke);
2107 }
2108
VisitMathAsin(HInvoke * invoke)2109 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
2110 GenFPToFPCall(invoke, codegen_, kQuickAsin);
2111 }
2112
VisitMathAtan(HInvoke * invoke)2113 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
2114 CreateFPToFPCallLocations(allocator_, invoke);
2115 }
2116
VisitMathAtan(HInvoke * invoke)2117 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
2118 GenFPToFPCall(invoke, codegen_, kQuickAtan);
2119 }
2120
VisitMathCbrt(HInvoke * invoke)2121 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
2122 CreateFPToFPCallLocations(allocator_, invoke);
2123 }
2124
VisitMathCbrt(HInvoke * invoke)2125 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
2126 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
2127 }
2128
VisitMathCosh(HInvoke * invoke)2129 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
2130 CreateFPToFPCallLocations(allocator_, invoke);
2131 }
2132
VisitMathCosh(HInvoke * invoke)2133 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
2134 GenFPToFPCall(invoke, codegen_, kQuickCosh);
2135 }
2136
VisitMathExp(HInvoke * invoke)2137 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
2138 CreateFPToFPCallLocations(allocator_, invoke);
2139 }
2140
VisitMathExp(HInvoke * invoke)2141 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
2142 GenFPToFPCall(invoke, codegen_, kQuickExp);
2143 }
2144
VisitMathExpm1(HInvoke * invoke)2145 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
2146 CreateFPToFPCallLocations(allocator_, invoke);
2147 }
2148
VisitMathExpm1(HInvoke * invoke)2149 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
2150 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
2151 }
2152
VisitMathLog(HInvoke * invoke)2153 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
2154 CreateFPToFPCallLocations(allocator_, invoke);
2155 }
2156
VisitMathLog(HInvoke * invoke)2157 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
2158 GenFPToFPCall(invoke, codegen_, kQuickLog);
2159 }
2160
VisitMathLog10(HInvoke * invoke)2161 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
2162 CreateFPToFPCallLocations(allocator_, invoke);
2163 }
2164
VisitMathLog10(HInvoke * invoke)2165 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
2166 GenFPToFPCall(invoke, codegen_, kQuickLog10);
2167 }
2168
VisitMathSinh(HInvoke * invoke)2169 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
2170 CreateFPToFPCallLocations(allocator_, invoke);
2171 }
2172
VisitMathSinh(HInvoke * invoke)2173 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
2174 GenFPToFPCall(invoke, codegen_, kQuickSinh);
2175 }
2176
VisitMathTan(HInvoke * invoke)2177 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
2178 CreateFPToFPCallLocations(allocator_, invoke);
2179 }
2180
VisitMathTan(HInvoke * invoke)2181 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
2182 GenFPToFPCall(invoke, codegen_, kQuickTan);
2183 }
2184
VisitMathTanh(HInvoke * invoke)2185 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
2186 CreateFPToFPCallLocations(allocator_, invoke);
2187 }
2188
VisitMathTanh(HInvoke * invoke)2189 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
2190 GenFPToFPCall(invoke, codegen_, kQuickTanh);
2191 }
2192
VisitMathAtan2(HInvoke * invoke)2193 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
2194 CreateFPFPToFPCallLocations(allocator_, invoke);
2195 }
2196
VisitMathAtan2(HInvoke * invoke)2197 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
2198 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
2199 }
2200
VisitMathPow(HInvoke * invoke)2201 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
2202 CreateFPFPToFPCallLocations(allocator_, invoke);
2203 }
2204
VisitMathPow(HInvoke * invoke)2205 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
2206 GenFPToFPCall(invoke, codegen_, kQuickPow);
2207 }
2208
VisitMathHypot(HInvoke * invoke)2209 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
2210 CreateFPFPToFPCallLocations(allocator_, invoke);
2211 }
2212
VisitMathHypot(HInvoke * invoke)2213 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
2214 GenFPToFPCall(invoke, codegen_, kQuickHypot);
2215 }
2216
VisitMathNextAfter(HInvoke * invoke)2217 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
2218 CreateFPFPToFPCallLocations(allocator_, invoke);
2219 }
2220
VisitMathNextAfter(HInvoke * invoke)2221 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
2222 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
2223 }
2224
VisitStringGetCharsNoCheck(HInvoke * invoke)2225 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2226 LocationSummary* locations =
2227 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2228 locations->SetInAt(0, Location::RequiresRegister());
2229 locations->SetInAt(1, Location::RequiresRegister());
2230 locations->SetInAt(2, Location::RequiresRegister());
2231 locations->SetInAt(3, Location::RequiresRegister());
2232 locations->SetInAt(4, Location::RequiresRegister());
2233
2234 locations->AddTemp(Location::RequiresRegister());
2235 locations->AddTemp(Location::RequiresRegister());
2236 locations->AddTemp(Location::RequiresRegister());
2237 }
2238
VisitStringGetCharsNoCheck(HInvoke * invoke)2239 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2240 MacroAssembler* masm = GetVIXLAssembler();
2241 LocationSummary* locations = invoke->GetLocations();
2242
2243 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2244 const size_t char_size = DataType::Size(DataType::Type::kUint16);
2245 DCHECK_EQ(char_size, 2u);
2246
2247 // Location of data in char array buffer.
2248 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2249
2250 // Location of char array data in string.
2251 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2252
2253 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2254 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2255 Register srcObj = XRegisterFrom(locations->InAt(0));
2256 Register srcBegin = XRegisterFrom(locations->InAt(1));
2257 Register srcEnd = XRegisterFrom(locations->InAt(2));
2258 Register dstObj = XRegisterFrom(locations->InAt(3));
2259 Register dstBegin = XRegisterFrom(locations->InAt(4));
2260
2261 Register src_ptr = XRegisterFrom(locations->GetTemp(0));
2262 Register num_chr = XRegisterFrom(locations->GetTemp(1));
2263 Register tmp1 = XRegisterFrom(locations->GetTemp(2));
2264
2265 UseScratchRegisterScope temps(masm);
2266 Register dst_ptr = temps.AcquireX();
2267 Register tmp2 = temps.AcquireX();
2268
2269 vixl::aarch64::Label done;
2270 vixl::aarch64::Label compressed_string_vector_loop;
2271 vixl::aarch64::Label compressed_string_remainder;
2272 __ Sub(num_chr, srcEnd, srcBegin);
2273 // Early out for valid zero-length retrievals.
2274 __ Cbz(num_chr, &done);
2275
2276 // dst address start to copy to.
2277 __ Add(dst_ptr, dstObj, Operand(data_offset));
2278 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
2279
2280 // src address to copy from.
2281 __ Add(src_ptr, srcObj, Operand(value_offset));
2282 vixl::aarch64::Label compressed_string_preloop;
2283 if (mirror::kUseStringCompression) {
2284 // Location of count in string.
2285 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2286 // String's length.
2287 __ Ldr(tmp2, MemOperand(srcObj, count_offset));
2288 __ Tbz(tmp2, 0, &compressed_string_preloop);
2289 }
2290 __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
2291
2292 // Do the copy.
2293 vixl::aarch64::Label loop;
2294 vixl::aarch64::Label remainder;
2295
2296 // Save repairing the value of num_chr on the < 8 character path.
2297 __ Subs(tmp1, num_chr, 8);
2298 __ B(lt, &remainder);
2299
2300 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2301 __ Mov(num_chr, tmp1);
2302
2303 // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
2304 // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
2305 __ Bind(&loop);
2306 __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
2307 __ Subs(num_chr, num_chr, 8);
2308 __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
2309 __ B(ge, &loop);
2310
2311 __ Adds(num_chr, num_chr, 8);
2312 __ B(eq, &done);
2313
2314 // Main loop for < 8 character case and remainder handling. Loads and stores one
2315 // 16-bit Java character at a time.
2316 __ Bind(&remainder);
2317 __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
2318 __ Subs(num_chr, num_chr, 1);
2319 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2320 __ B(gt, &remainder);
2321 __ B(&done);
2322
2323 if (mirror::kUseStringCompression) {
2324 // For compressed strings, acquire a SIMD temporary register.
2325 VRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize);
2326 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2327 DCHECK_EQ(c_char_size, 1u);
2328 __ Bind(&compressed_string_preloop);
2329 __ Add(src_ptr, src_ptr, Operand(srcBegin));
2330
2331 // Save repairing the value of num_chr on the < 8 character path.
2332 __ Subs(tmp1, num_chr, 8);
2333 __ B(lt, &compressed_string_remainder);
2334
2335 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2336 __ Mov(num_chr, tmp1);
2337
2338 // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time.
2339 // Uses SIMD instructions.
2340 __ Bind(&compressed_string_vector_loop);
2341 __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex));
2342 __ Subs(num_chr, num_chr, 8);
2343 __ Uxtl(vtmp1.V8H(), vtmp1.V8B());
2344 __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex));
2345 __ B(ge, &compressed_string_vector_loop);
2346
2347 __ Adds(num_chr, num_chr, 8);
2348 __ B(eq, &done);
2349
2350 // Loop for < 8 character case and remainder handling with a compressed src.
2351 // Copies 1 character (8-bit) to (16-bit) at a time.
2352 __ Bind(&compressed_string_remainder);
2353 __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2354 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2355 __ Subs(num_chr, num_chr, Operand(1));
2356 __ B(gt, &compressed_string_remainder);
2357 }
2358
2359 __ Bind(&done);
2360 }
2361
2362 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
2363 // implementation there for longer copy lengths.
2364 static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
2365
SetSystemArrayCopyLocationRequires(LocationSummary * locations,uint32_t at,HInstruction * input)2366 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
2367 uint32_t at,
2368 HInstruction* input) {
2369 HIntConstant* const_input = input->AsIntConstant();
2370 if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
2371 locations->SetInAt(at, Location::RequiresRegister());
2372 } else {
2373 locations->SetInAt(at, Location::RegisterOrConstant(input));
2374 }
2375 }
2376
VisitSystemArrayCopyChar(HInvoke * invoke)2377 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2378 // Check to see if we have known failures that will cause us to have to bail out
2379 // to the runtime, and just generate the runtime call directly.
2380 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2381 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2382
2383 // The positions must be non-negative.
2384 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2385 (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2386 // We will have to fail anyways.
2387 return;
2388 }
2389
2390 // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2391 // native implementation.
2392 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2393 if (length != nullptr) {
2394 int32_t len = length->GetValue();
2395 if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2396 // Just call as normal.
2397 return;
2398 }
2399 }
2400
2401 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2402 LocationSummary* locations =
2403 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2404 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2405 locations->SetInAt(0, Location::RequiresRegister());
2406 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2407 locations->SetInAt(2, Location::RequiresRegister());
2408 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2409 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2410
2411 locations->AddTemp(Location::RequiresRegister());
2412 locations->AddTemp(Location::RequiresRegister());
2413 locations->AddTemp(Location::RequiresRegister());
2414 }
2415
CheckSystemArrayCopyPosition(MacroAssembler * masm,const Location & pos,const Register & input,const Location & length,SlowPathCodeARM64 * slow_path,const Register & temp,bool length_is_input_length=false)2416 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2417 const Location& pos,
2418 const Register& input,
2419 const Location& length,
2420 SlowPathCodeARM64* slow_path,
2421 const Register& temp,
2422 bool length_is_input_length = false) {
2423 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2424 if (pos.IsConstant()) {
2425 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2426 if (pos_const == 0) {
2427 if (!length_is_input_length) {
2428 // Check that length(input) >= length.
2429 __ Ldr(temp, MemOperand(input, length_offset));
2430 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2431 __ B(slow_path->GetEntryLabel(), lt);
2432 }
2433 } else {
2434 // Check that length(input) >= pos.
2435 __ Ldr(temp, MemOperand(input, length_offset));
2436 __ Subs(temp, temp, pos_const);
2437 __ B(slow_path->GetEntryLabel(), lt);
2438
2439 // Check that (length(input) - pos) >= length.
2440 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2441 __ B(slow_path->GetEntryLabel(), lt);
2442 }
2443 } else if (length_is_input_length) {
2444 // The only way the copy can succeed is if pos is zero.
2445 __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2446 } else {
2447 // Check that pos >= 0.
2448 Register pos_reg = WRegisterFrom(pos);
2449 __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
2450
2451 // Check that pos <= length(input) && (length(input) - pos) >= length.
2452 __ Ldr(temp, MemOperand(input, length_offset));
2453 __ Subs(temp, temp, pos_reg);
2454 // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
2455 __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge);
2456 __ B(slow_path->GetEntryLabel(), lt);
2457 }
2458 }
2459
2460 // Compute base source address, base destination address, and end
2461 // source address for System.arraycopy* intrinsics in `src_base`,
2462 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(MacroAssembler * masm,DataType::Type type,const Register & src,const Location & src_pos,const Register & dst,const Location & dst_pos,const Location & copy_length,const Register & src_base,const Register & dst_base,const Register & src_end)2463 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
2464 DataType::Type type,
2465 const Register& src,
2466 const Location& src_pos,
2467 const Register& dst,
2468 const Location& dst_pos,
2469 const Location& copy_length,
2470 const Register& src_base,
2471 const Register& dst_base,
2472 const Register& src_end) {
2473 // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
2474 DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
2475 << "Unexpected element type: " << type;
2476 const int32_t element_size = DataType::Size(type);
2477 const int32_t element_size_shift = DataType::SizeShift(type);
2478 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2479
2480 if (src_pos.IsConstant()) {
2481 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2482 __ Add(src_base, src, element_size * constant + data_offset);
2483 } else {
2484 __ Add(src_base, src, data_offset);
2485 __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
2486 }
2487
2488 if (dst_pos.IsConstant()) {
2489 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
2490 __ Add(dst_base, dst, element_size * constant + data_offset);
2491 } else {
2492 __ Add(dst_base, dst, data_offset);
2493 __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
2494 }
2495
2496 if (copy_length.IsConstant()) {
2497 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2498 __ Add(src_end, src_base, element_size * constant);
2499 } else {
2500 __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
2501 }
2502 }
2503
VisitSystemArrayCopyChar(HInvoke * invoke)2504 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2505 MacroAssembler* masm = GetVIXLAssembler();
2506 LocationSummary* locations = invoke->GetLocations();
2507 Register src = XRegisterFrom(locations->InAt(0));
2508 Location src_pos = locations->InAt(1);
2509 Register dst = XRegisterFrom(locations->InAt(2));
2510 Location dst_pos = locations->InAt(3);
2511 Location length = locations->InAt(4);
2512
2513 SlowPathCodeARM64* slow_path =
2514 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2515 codegen_->AddSlowPath(slow_path);
2516
2517 // If source and destination are the same, take the slow path. Overlapping copy regions must be
2518 // copied in reverse and we can't know in all cases if it's needed.
2519 __ Cmp(src, dst);
2520 __ B(slow_path->GetEntryLabel(), eq);
2521
2522 // Bail out if the source is null.
2523 __ Cbz(src, slow_path->GetEntryLabel());
2524
2525 // Bail out if the destination is null.
2526 __ Cbz(dst, slow_path->GetEntryLabel());
2527
2528 if (!length.IsConstant()) {
2529 // Merge the following two comparisons into one:
2530 // If the length is negative, bail out (delegate to libcore's native implementation).
2531 // If the length > 32 then (currently) prefer libcore's native implementation.
2532 __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
2533 __ B(slow_path->GetEntryLabel(), hi);
2534 } else {
2535 // We have already checked in the LocationsBuilder for the constant case.
2536 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2537 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2538 }
2539
2540 Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2541 Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2542 Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2543
2544 CheckSystemArrayCopyPosition(masm,
2545 src_pos,
2546 src,
2547 length,
2548 slow_path,
2549 src_curr_addr,
2550 false);
2551
2552 CheckSystemArrayCopyPosition(masm,
2553 dst_pos,
2554 dst,
2555 length,
2556 slow_path,
2557 src_curr_addr,
2558 false);
2559
2560 src_curr_addr = src_curr_addr.X();
2561 dst_curr_addr = dst_curr_addr.X();
2562 src_stop_addr = src_stop_addr.X();
2563
2564 GenSystemArrayCopyAddresses(masm,
2565 DataType::Type::kUint16,
2566 src,
2567 src_pos,
2568 dst,
2569 dst_pos,
2570 length,
2571 src_curr_addr,
2572 dst_curr_addr,
2573 src_stop_addr);
2574
2575 // Iterate over the arrays and do a raw copy of the chars.
2576 const int32_t char_size = DataType::Size(DataType::Type::kUint16);
2577 UseScratchRegisterScope temps(masm);
2578 Register tmp = temps.AcquireW();
2579 vixl::aarch64::Label loop, done;
2580 __ Bind(&loop);
2581 __ Cmp(src_curr_addr, src_stop_addr);
2582 __ B(&done, eq);
2583 __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2584 __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
2585 __ B(&loop);
2586 __ Bind(&done);
2587
2588 __ Bind(slow_path->GetExitLabel());
2589 }
2590
2591 // We can choose to use the native implementation there for longer copy lengths.
2592 static constexpr int32_t kSystemArrayCopyThreshold = 128;
2593
2594 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2595 // We want to use two temporary registers in order to reduce the register pressure in arm64.
2596 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
VisitSystemArrayCopy(HInvoke * invoke)2597 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2598 // The only read barrier implementation supporting the
2599 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2600 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2601 return;
2602 }
2603
2604 // Check to see if we have known failures that will cause us to have to bail out
2605 // to the runtime, and just generate the runtime call directly.
2606 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2607 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2608
2609 // The positions must be non-negative.
2610 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2611 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2612 // We will have to fail anyways.
2613 return;
2614 }
2615
2616 // The length must be >= 0.
2617 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2618 if (length != nullptr) {
2619 int32_t len = length->GetValue();
2620 if (len < 0 || len >= kSystemArrayCopyThreshold) {
2621 // Just call as normal.
2622 return;
2623 }
2624 }
2625
2626 SystemArrayCopyOptimizations optimizations(invoke);
2627
2628 if (optimizations.GetDestinationIsSource()) {
2629 if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2630 // We only support backward copying if source and destination are the same.
2631 return;
2632 }
2633 }
2634
2635 if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2636 // We currently don't intrinsify primitive copying.
2637 return;
2638 }
2639
2640 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2641 LocationSummary* locations =
2642 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2643 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2644 locations->SetInAt(0, Location::RequiresRegister());
2645 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2646 locations->SetInAt(2, Location::RequiresRegister());
2647 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2648 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2649
2650 locations->AddTemp(Location::RequiresRegister());
2651 locations->AddTemp(Location::RequiresRegister());
2652 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2653 // Temporary register IP0, obtained from the VIXL scratch register
2654 // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2655 // (because that register is clobbered by ReadBarrierMarkRegX
2656 // entry points). It cannot be used in calls to
2657 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2658 // either. For these reasons, get a third extra temporary register
2659 // from the register allocator.
2660 locations->AddTemp(Location::RequiresRegister());
2661 } else {
2662 // Cases other than Baker read barriers: the third temporary will
2663 // be acquired from the VIXL scratch register pool.
2664 }
2665 }
2666
VisitSystemArrayCopy(HInvoke * invoke)2667 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2668 // The only read barrier implementation supporting the
2669 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2670 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2671
2672 MacroAssembler* masm = GetVIXLAssembler();
2673 LocationSummary* locations = invoke->GetLocations();
2674
2675 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2676 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2677 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2678 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2679 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2680
2681 Register src = XRegisterFrom(locations->InAt(0));
2682 Location src_pos = locations->InAt(1);
2683 Register dest = XRegisterFrom(locations->InAt(2));
2684 Location dest_pos = locations->InAt(3);
2685 Location length = locations->InAt(4);
2686 Register temp1 = WRegisterFrom(locations->GetTemp(0));
2687 Location temp1_loc = LocationFrom(temp1);
2688 Register temp2 = WRegisterFrom(locations->GetTemp(1));
2689 Location temp2_loc = LocationFrom(temp2);
2690
2691 SlowPathCodeARM64* intrinsic_slow_path =
2692 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2693 codegen_->AddSlowPath(intrinsic_slow_path);
2694
2695 vixl::aarch64::Label conditions_on_positions_validated;
2696 SystemArrayCopyOptimizations optimizations(invoke);
2697
2698 // If source and destination are the same, we go to slow path if we need to do
2699 // forward copying.
2700 if (src_pos.IsConstant()) {
2701 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2702 if (dest_pos.IsConstant()) {
2703 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2704 if (optimizations.GetDestinationIsSource()) {
2705 // Checked when building locations.
2706 DCHECK_GE(src_pos_constant, dest_pos_constant);
2707 } else if (src_pos_constant < dest_pos_constant) {
2708 __ Cmp(src, dest);
2709 __ B(intrinsic_slow_path->GetEntryLabel(), eq);
2710 }
2711 // Checked when building locations.
2712 DCHECK(!optimizations.GetDestinationIsSource()
2713 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2714 } else {
2715 if (!optimizations.GetDestinationIsSource()) {
2716 __ Cmp(src, dest);
2717 __ B(&conditions_on_positions_validated, ne);
2718 }
2719 __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
2720 __ B(intrinsic_slow_path->GetEntryLabel(), gt);
2721 }
2722 } else {
2723 if (!optimizations.GetDestinationIsSource()) {
2724 __ Cmp(src, dest);
2725 __ B(&conditions_on_positions_validated, ne);
2726 }
2727 __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2728 OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
2729 __ B(intrinsic_slow_path->GetEntryLabel(), lt);
2730 }
2731
2732 __ Bind(&conditions_on_positions_validated);
2733
2734 if (!optimizations.GetSourceIsNotNull()) {
2735 // Bail out if the source is null.
2736 __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
2737 }
2738
2739 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2740 // Bail out if the destination is null.
2741 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
2742 }
2743
2744 // We have already checked in the LocationsBuilder for the constant case.
2745 if (!length.IsConstant() &&
2746 !optimizations.GetCountIsSourceLength() &&
2747 !optimizations.GetCountIsDestinationLength()) {
2748 // Merge the following two comparisons into one:
2749 // If the length is negative, bail out (delegate to libcore's native implementation).
2750 // If the length >= 128 then (currently) prefer native implementation.
2751 __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
2752 __ B(intrinsic_slow_path->GetEntryLabel(), hs);
2753 }
2754 // Validity checks: source.
2755 CheckSystemArrayCopyPosition(masm,
2756 src_pos,
2757 src,
2758 length,
2759 intrinsic_slow_path,
2760 temp1,
2761 optimizations.GetCountIsSourceLength());
2762
2763 // Validity checks: dest.
2764 CheckSystemArrayCopyPosition(masm,
2765 dest_pos,
2766 dest,
2767 length,
2768 intrinsic_slow_path,
2769 temp1,
2770 optimizations.GetCountIsDestinationLength());
2771 {
2772 // We use a block to end the scratch scope before the write barrier, thus
2773 // freeing the temporary registers so they can be used in `MarkGCCard`.
2774 UseScratchRegisterScope temps(masm);
2775 Location temp3_loc; // Used only for Baker read barrier.
2776 Register temp3;
2777 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2778 temp3_loc = locations->GetTemp(2);
2779 temp3 = WRegisterFrom(temp3_loc);
2780 } else {
2781 temp3 = temps.AcquireW();
2782 }
2783
2784 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2785 // Check whether all elements of the source array are assignable to the component
2786 // type of the destination array. We do two checks: the classes are the same,
2787 // or the destination is Object[]. If none of these checks succeed, we go to the
2788 // slow path.
2789
2790 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2791 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2792 // /* HeapReference<Class> */ temp1 = src->klass_
2793 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2794 temp1_loc,
2795 src.W(),
2796 class_offset,
2797 temp3_loc,
2798 /* needs_null_check= */ false,
2799 /* use_load_acquire= */ false);
2800 // Bail out if the source is not a non primitive array.
2801 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2802 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2803 temp1_loc,
2804 temp1,
2805 component_offset,
2806 temp3_loc,
2807 /* needs_null_check= */ false,
2808 /* use_load_acquire= */ false);
2809 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
2810 // If heap poisoning is enabled, `temp1` has been unpoisoned
2811 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2812 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2813 __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
2814 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2815 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2816 }
2817
2818 // /* HeapReference<Class> */ temp1 = dest->klass_
2819 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2820 temp1_loc,
2821 dest.W(),
2822 class_offset,
2823 temp3_loc,
2824 /* needs_null_check= */ false,
2825 /* use_load_acquire= */ false);
2826
2827 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2828 // Bail out if the destination is not a non primitive array.
2829 //
2830 // Register `temp1` is not trashed by the read barrier emitted
2831 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2832 // method produces a call to a ReadBarrierMarkRegX entry point,
2833 // which saves all potentially live registers, including
2834 // temporaries such a `temp1`.
2835 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2836 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2837 temp2_loc,
2838 temp1,
2839 component_offset,
2840 temp3_loc,
2841 /* needs_null_check= */ false,
2842 /* use_load_acquire= */ false);
2843 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2844 // If heap poisoning is enabled, `temp2` has been unpoisoned
2845 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2846 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2847 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2848 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2849 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2850 }
2851
2852 // For the same reason given earlier, `temp1` is not trashed by the
2853 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2854 // /* HeapReference<Class> */ temp2 = src->klass_
2855 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2856 temp2_loc,
2857 src.W(),
2858 class_offset,
2859 temp3_loc,
2860 /* needs_null_check= */ false,
2861 /* use_load_acquire= */ false);
2862 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2863 __ Cmp(temp1, temp2);
2864
2865 if (optimizations.GetDestinationIsTypedObjectArray()) {
2866 vixl::aarch64::Label do_copy;
2867 __ B(&do_copy, eq);
2868 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2869 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2870 temp1_loc,
2871 temp1,
2872 component_offset,
2873 temp3_loc,
2874 /* needs_null_check= */ false,
2875 /* use_load_acquire= */ false);
2876 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2877 // We do not need to emit a read barrier for the following
2878 // heap reference load, as `temp1` is only used in a
2879 // comparison with null below, and this reference is not
2880 // kept afterwards.
2881 __ Ldr(temp1, HeapOperand(temp1, super_offset));
2882 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2883 __ Bind(&do_copy);
2884 } else {
2885 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2886 }
2887 } else {
2888 // Non read barrier code.
2889
2890 // /* HeapReference<Class> */ temp1 = dest->klass_
2891 __ Ldr(temp1, MemOperand(dest, class_offset));
2892 // /* HeapReference<Class> */ temp2 = src->klass_
2893 __ Ldr(temp2, MemOperand(src, class_offset));
2894 bool did_unpoison = false;
2895 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2896 !optimizations.GetSourceIsNonPrimitiveArray()) {
2897 // One or two of the references need to be unpoisoned. Unpoison them
2898 // both to make the identity check valid.
2899 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2900 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2901 did_unpoison = true;
2902 }
2903
2904 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2905 // Bail out if the destination is not a non primitive array.
2906 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2907 __ Ldr(temp3, HeapOperand(temp1, component_offset));
2908 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2909 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2910 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2911 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2912 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2913 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2914 }
2915
2916 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2917 // Bail out if the source is not a non primitive array.
2918 // /* HeapReference<Class> */ temp3 = temp2->component_type_
2919 __ Ldr(temp3, HeapOperand(temp2, component_offset));
2920 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2921 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2922 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2923 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2924 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2925 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2926 }
2927
2928 __ Cmp(temp1, temp2);
2929
2930 if (optimizations.GetDestinationIsTypedObjectArray()) {
2931 vixl::aarch64::Label do_copy;
2932 __ B(&do_copy, eq);
2933 if (!did_unpoison) {
2934 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2935 }
2936 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2937 __ Ldr(temp1, HeapOperand(temp1, component_offset));
2938 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2939 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2940 __ Ldr(temp1, HeapOperand(temp1, super_offset));
2941 // No need to unpoison the result, we're comparing against null.
2942 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2943 __ Bind(&do_copy);
2944 } else {
2945 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2946 }
2947 }
2948 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2949 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2950 // Bail out if the source is not a non primitive array.
2951 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2952 // /* HeapReference<Class> */ temp1 = src->klass_
2953 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2954 temp1_loc,
2955 src.W(),
2956 class_offset,
2957 temp3_loc,
2958 /* needs_null_check= */ false,
2959 /* use_load_acquire= */ false);
2960 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2961 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2962 temp2_loc,
2963 temp1,
2964 component_offset,
2965 temp3_loc,
2966 /* needs_null_check= */ false,
2967 /* use_load_acquire= */ false);
2968 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2969 // If heap poisoning is enabled, `temp2` has been unpoisoned
2970 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2971 } else {
2972 // /* HeapReference<Class> */ temp1 = src->klass_
2973 __ Ldr(temp1, HeapOperand(src.W(), class_offset));
2974 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2975 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2976 __ Ldr(temp2, HeapOperand(temp1, component_offset));
2977 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2978 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2979 }
2980 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2981 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2982 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2983 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2984 }
2985
2986 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
2987 // Null constant length: not need to emit the loop code at all.
2988 } else {
2989 Register src_curr_addr = temp1.X();
2990 Register dst_curr_addr = temp2.X();
2991 Register src_stop_addr = temp3.X();
2992 vixl::aarch64::Label done;
2993 const DataType::Type type = DataType::Type::kReference;
2994 const int32_t element_size = DataType::Size(type);
2995
2996 if (length.IsRegister()) {
2997 // Don't enter the copy loop if the length is null.
2998 __ Cbz(WRegisterFrom(length), &done);
2999 }
3000
3001 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3002 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
3003
3004 // SystemArrayCopy implementation for Baker read barriers (see
3005 // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
3006 //
3007 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3008 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
3009 // bool is_gray = (rb_state == ReadBarrier::GrayState());
3010 // if (is_gray) {
3011 // // Slow-path copy.
3012 // do {
3013 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
3014 // } while (src_ptr != end_ptr)
3015 // } else {
3016 // // Fast-path copy.
3017 // do {
3018 // *dest_ptr++ = *src_ptr++;
3019 // } while (src_ptr != end_ptr)
3020 // }
3021
3022 // Make sure `tmp` is not IP0, as it is clobbered by
3023 // ReadBarrierMarkRegX entry points in
3024 // ReadBarrierSystemArrayCopySlowPathARM64.
3025 DCHECK(temps.IsAvailable(ip0));
3026 temps.Exclude(ip0);
3027 Register tmp = temps.AcquireW();
3028 DCHECK_NE(LocationFrom(tmp).reg(), IP0);
3029 // Put IP0 back in the pool so that VIXL has at least one
3030 // scratch register available to emit macro-instructions (note
3031 // that IP1 is already used for `tmp`). Indeed some
3032 // macro-instructions used in GenSystemArrayCopyAddresses
3033 // (invoked hereunder) may require a scratch register (for
3034 // instance to emit a load with a large constant offset).
3035 temps.Include(ip0);
3036
3037 // /* int32_t */ monitor = src->monitor_
3038 __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
3039 // /* LockWord */ lock_word = LockWord(monitor)
3040 static_assert(sizeof(LockWord) == sizeof(int32_t),
3041 "art::LockWord and int32_t have different sizes.");
3042
3043 // Introduce a dependency on the lock_word including rb_state,
3044 // to prevent load-load reordering, and without using
3045 // a memory barrier (which would be more expensive).
3046 // `src` is unchanged by this operation, but its value now depends
3047 // on `tmp`.
3048 __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
3049
3050 // Compute base source address, base destination address, and end
3051 // source address for System.arraycopy* intrinsics in `src_base`,
3052 // `dst_base` and `src_end` respectively.
3053 // Note that `src_curr_addr` is computed from from `src` (and
3054 // `src_pos`) here, and thus honors the artificial dependency
3055 // of `src` on `tmp`.
3056 GenSystemArrayCopyAddresses(masm,
3057 type,
3058 src,
3059 src_pos,
3060 dest,
3061 dest_pos,
3062 length,
3063 src_curr_addr,
3064 dst_curr_addr,
3065 src_stop_addr);
3066
3067 // Slow path used to copy array when `src` is gray.
3068 SlowPathCodeARM64* read_barrier_slow_path =
3069 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
3070 invoke, LocationFrom(tmp));
3071 codegen_->AddSlowPath(read_barrier_slow_path);
3072
3073 // Given the numeric representation, it's enough to check the low bit of the rb_state.
3074 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3075 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3076 __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
3077
3078 // Fast-path copy.
3079 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3080 // poison/unpoison.
3081 vixl::aarch64::Label loop;
3082 __ Bind(&loop);
3083 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3084 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
3085 __ Cmp(src_curr_addr, src_stop_addr);
3086 __ B(&loop, ne);
3087
3088 __ Bind(read_barrier_slow_path->GetExitLabel());
3089 } else {
3090 // Non read barrier code.
3091 // Compute base source address, base destination address, and end
3092 // source address for System.arraycopy* intrinsics in `src_base`,
3093 // `dst_base` and `src_end` respectively.
3094 GenSystemArrayCopyAddresses(masm,
3095 type,
3096 src,
3097 src_pos,
3098 dest,
3099 dest_pos,
3100 length,
3101 src_curr_addr,
3102 dst_curr_addr,
3103 src_stop_addr);
3104 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3105 // poison/unpoison.
3106 vixl::aarch64::Label loop;
3107 __ Bind(&loop);
3108 {
3109 Register tmp = temps.AcquireW();
3110 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3111 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
3112 }
3113 __ Cmp(src_curr_addr, src_stop_addr);
3114 __ B(&loop, ne);
3115 }
3116 __ Bind(&done);
3117 }
3118 }
3119
3120 // We only need one card marking on the destination array.
3121 codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
3122
3123 __ Bind(intrinsic_slow_path->GetExitLabel());
3124 }
3125
GenIsInfinite(LocationSummary * locations,bool is64bit,MacroAssembler * masm)3126 static void GenIsInfinite(LocationSummary* locations,
3127 bool is64bit,
3128 MacroAssembler* masm) {
3129 Operand infinity(0);
3130 Operand tst_mask(0);
3131 Register out;
3132
3133 if (is64bit) {
3134 infinity = Operand(kPositiveInfinityDouble);
3135 tst_mask = MaskLeastSignificant<uint64_t>(63);
3136 out = XRegisterFrom(locations->Out());
3137 } else {
3138 infinity = Operand(kPositiveInfinityFloat);
3139 tst_mask = MaskLeastSignificant<uint32_t>(31);
3140 out = WRegisterFrom(locations->Out());
3141 }
3142
3143 MoveFPToInt(locations, is64bit, masm);
3144 // Checks whether exponent bits are all 1 and fraction bits are all 0.
3145 __ Eor(out, out, infinity);
3146 // TST bitmask is used to mask out the sign bit: either 0x7fffffff or 0x7fffffffffffffff
3147 // depending on is64bit.
3148 __ Tst(out, tst_mask);
3149 __ Cset(out, eq);
3150 }
3151
VisitFloatIsInfinite(HInvoke * invoke)3152 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
3153 CreateFPToIntLocations(allocator_, invoke);
3154 }
3155
VisitFloatIsInfinite(HInvoke * invoke)3156 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
3157 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
3158 }
3159
VisitDoubleIsInfinite(HInvoke * invoke)3160 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
3161 CreateFPToIntLocations(allocator_, invoke);
3162 }
3163
VisitDoubleIsInfinite(HInvoke * invoke)3164 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
3165 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
3166 }
3167
VisitIntegerValueOf(HInvoke * invoke)3168 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
3169 InvokeRuntimeCallingConvention calling_convention;
3170 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3171 invoke,
3172 codegen_,
3173 calling_convention.GetReturnLocation(DataType::Type::kReference),
3174 Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3175 }
3176
VisitIntegerValueOf(HInvoke * invoke)3177 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
3178 IntrinsicVisitor::IntegerValueOfInfo info =
3179 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
3180 LocationSummary* locations = invoke->GetLocations();
3181 MacroAssembler* masm = GetVIXLAssembler();
3182
3183 Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
3184 UseScratchRegisterScope temps(masm);
3185 Register temp = temps.AcquireW();
3186 auto allocate_instance = [&]() {
3187 DCHECK(out.X().Is(InvokeRuntimeCallingConvention().GetRegisterAt(0)));
3188 codegen_->LoadIntrinsicDeclaringClass(out, invoke);
3189 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3190 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3191 };
3192 if (invoke->InputAt(0)->IsConstant()) {
3193 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3194 if (static_cast<uint32_t>(value - info.low) < info.length) {
3195 // Just embed the j.l.Integer in the code.
3196 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3197 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3198 } else {
3199 DCHECK(locations->CanCall());
3200 // Allocate and initialize a new j.l.Integer.
3201 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3202 // JIT object table.
3203 allocate_instance();
3204 __ Mov(temp.W(), value);
3205 __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
3206 // Class pointer and `value` final field stores require a barrier before publication.
3207 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3208 }
3209 } else {
3210 DCHECK(locations->CanCall());
3211 Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
3212 // Check bounds of our cache.
3213 __ Add(out.W(), in.W(), -info.low);
3214 __ Cmp(out.W(), info.length);
3215 vixl::aarch64::Label allocate, done;
3216 __ B(&allocate, hs);
3217 // If the value is within the bounds, load the j.l.Integer directly from the array.
3218 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
3219 MemOperand source = HeapOperand(
3220 temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
3221 codegen_->Load(DataType::Type::kReference, out, source);
3222 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
3223 __ B(&done);
3224 __ Bind(&allocate);
3225 // Otherwise allocate and initialize a new j.l.Integer.
3226 allocate_instance();
3227 __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
3228 // Class pointer and `value` final field stores require a barrier before publication.
3229 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3230 __ Bind(&done);
3231 }
3232 }
3233
VisitReferenceGetReferent(HInvoke * invoke)3234 void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3235 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3236
3237 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) {
3238 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
3239 }
3240 }
3241
VisitReferenceGetReferent(HInvoke * invoke)3242 void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3243 MacroAssembler* masm = GetVIXLAssembler();
3244 LocationSummary* locations = invoke->GetLocations();
3245
3246 Location obj = locations->InAt(0);
3247 Location out = locations->Out();
3248
3249 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
3250 codegen_->AddSlowPath(slow_path);
3251
3252 if (kEmitCompilerReadBarrier) {
3253 // Check self->GetWeakRefAccessEnabled().
3254 UseScratchRegisterScope temps(masm);
3255 Register temp = temps.AcquireW();
3256 __ Ldr(temp,
3257 MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArm64PointerSize>().Uint32Value()));
3258 __ Cbz(temp, slow_path->GetEntryLabel());
3259 }
3260
3261 {
3262 // Load the java.lang.ref.Reference class.
3263 UseScratchRegisterScope temps(masm);
3264 Register temp = temps.AcquireW();
3265 codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
3266
3267 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3268 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3269 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3270 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3271 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3272 __ Ldrh(temp, HeapOperand(temp, disable_intrinsic_offset.Uint32Value()));
3273 __ Cbnz(temp, slow_path->GetEntryLabel());
3274 }
3275
3276 // Load the value from the field.
3277 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3278 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3279 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3280 out,
3281 WRegisterFrom(obj),
3282 referent_offset,
3283 /*maybe_temp=*/ locations->GetTemp(0),
3284 /*needs_null_check=*/ true,
3285 /*use_load_acquire=*/ true);
3286 } else {
3287 MemOperand field = HeapOperand(WRegisterFrom(obj), referent_offset);
3288 codegen_->LoadAcquire(
3289 invoke, DataType::Type::kReference, WRegisterFrom(out), field, /*needs_null_check=*/ true);
3290 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3291 }
3292 __ Bind(slow_path->GetExitLabel());
3293 }
3294
VisitReferenceRefersTo(HInvoke * invoke)3295 void IntrinsicLocationsBuilderARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3296 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3297 }
3298
VisitReferenceRefersTo(HInvoke * invoke)3299 void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3300 LocationSummary* locations = invoke->GetLocations();
3301 MacroAssembler* masm = codegen_->GetVIXLAssembler();
3302 UseScratchRegisterScope temps(masm);
3303
3304 Register obj = WRegisterFrom(locations->InAt(0));
3305 Register other = WRegisterFrom(locations->InAt(1));
3306 Register out = WRegisterFrom(locations->Out());
3307 Register tmp = temps.AcquireW();
3308
3309 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3310 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3311
3312 MemOperand field = HeapOperand(obj, referent_offset);
3313 codegen_->LoadAcquire(invoke, DataType::Type::kReference, tmp, field, /*needs_null_check=*/ true);
3314 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(tmp);
3315
3316 __ Cmp(tmp, other);
3317
3318 if (kEmitCompilerReadBarrier) {
3319 DCHECK(kUseBakerReadBarrier);
3320
3321 vixl::aarch64::Label calculate_result;
3322
3323 // If the GC is not marking, the comparison result is final.
3324 __ Cbz(mr, &calculate_result);
3325
3326 __ B(&calculate_result, eq); // ZF set if taken.
3327
3328 // Check if the loaded reference is null.
3329 __ Cbz(tmp, &calculate_result); // ZF clear if taken.
3330
3331 // For correct memory visibility, we need a barrier before loading the lock word.
3332 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3333
3334 // Load the lockword and check if it is a forwarding address.
3335 static_assert(LockWord::kStateShift == 30u);
3336 static_assert(LockWord::kStateForwardingAddress == 3u);
3337 __ Ldr(tmp, HeapOperand(tmp, monitor_offset));
3338 __ Cmp(tmp, Operand(0xc0000000));
3339 __ B(&calculate_result, lo); // ZF clear if taken.
3340
3341 // Extract the forwarding address and compare with `other`.
3342 __ Cmp(other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
3343
3344 __ Bind(&calculate_result);
3345 }
3346
3347 // Convert ZF into the Boolean result.
3348 __ Cset(out, eq);
3349 }
3350
VisitThreadInterrupted(HInvoke * invoke)3351 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
3352 LocationSummary* locations =
3353 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3354 locations->SetOut(Location::RequiresRegister());
3355 }
3356
VisitThreadInterrupted(HInvoke * invoke)3357 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
3358 MacroAssembler* masm = GetVIXLAssembler();
3359 Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
3360 UseScratchRegisterScope temps(masm);
3361 Register temp = temps.AcquireX();
3362
3363 __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
3364 __ Ldar(out.W(), MemOperand(temp));
3365
3366 vixl::aarch64::Label done;
3367 __ Cbz(out.W(), &done);
3368 __ Stlr(wzr, MemOperand(temp));
3369 __ Bind(&done);
3370 }
3371
VisitReachabilityFence(HInvoke * invoke)3372 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
3373 LocationSummary* locations =
3374 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3375 locations->SetInAt(0, Location::Any());
3376 }
3377
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3378 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3379
VisitCRC32Update(HInvoke * invoke)3380 void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
3381 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3382 return;
3383 }
3384
3385 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3386 LocationSummary::kNoCall,
3387 kIntrinsified);
3388
3389 locations->SetInAt(0, Location::RequiresRegister());
3390 locations->SetInAt(1, Location::RequiresRegister());
3391 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3392 }
3393
3394 // Lower the invoke of CRC32.update(int crc, int b).
VisitCRC32Update(HInvoke * invoke)3395 void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
3396 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3397
3398 MacroAssembler* masm = GetVIXLAssembler();
3399
3400 Register crc = InputRegisterAt(invoke, 0);
3401 Register val = InputRegisterAt(invoke, 1);
3402 Register out = OutputRegister(invoke);
3403
3404 // The general algorithm of the CRC32 calculation is:
3405 // crc = ~crc
3406 // result = crc32_for_byte(crc, b)
3407 // crc = ~result
3408 // It is directly lowered to three instructions.
3409
3410 UseScratchRegisterScope temps(masm);
3411 Register tmp = temps.AcquireSameSizeAs(out);
3412
3413 __ Mvn(tmp, crc);
3414 __ Crc32b(tmp, tmp, val);
3415 __ Mvn(out, tmp);
3416 }
3417
3418 // Generate code using CRC32 instructions which calculates
3419 // a CRC32 value of a byte.
3420 //
3421 // Parameters:
3422 // masm - VIXL macro assembler
3423 // crc - a register holding an initial CRC value
3424 // ptr - a register holding a memory address of bytes
3425 // length - a register holding a number of bytes to process
3426 // out - a register to put a result of calculation
GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler * masm,const Register & crc,const Register & ptr,const Register & length,const Register & out)3427 static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
3428 const Register& crc,
3429 const Register& ptr,
3430 const Register& length,
3431 const Register& out) {
3432 // The algorithm of CRC32 of bytes is:
3433 // crc = ~crc
3434 // process a few first bytes to make the array 8-byte aligned
3435 // while array has 8 bytes do:
3436 // crc = crc32_of_8bytes(crc, 8_bytes(array))
3437 // if array has 4 bytes:
3438 // crc = crc32_of_4bytes(crc, 4_bytes(array))
3439 // if array has 2 bytes:
3440 // crc = crc32_of_2bytes(crc, 2_bytes(array))
3441 // if array has a byte:
3442 // crc = crc32_of_byte(crc, 1_byte(array))
3443 // crc = ~crc
3444
3445 vixl::aarch64::Label loop, done;
3446 vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
3447 vixl::aarch64::Label aligned2, aligned4, aligned8;
3448
3449 // Use VIXL scratch registers as the VIXL macro assembler won't use them in
3450 // instructions below.
3451 UseScratchRegisterScope temps(masm);
3452 Register len = temps.AcquireW();
3453 Register array_elem = temps.AcquireW();
3454
3455 __ Mvn(out, crc);
3456 __ Mov(len, length);
3457
3458 __ Tbz(ptr, 0, &aligned2);
3459 __ Subs(len, len, 1);
3460 __ B(&done, lo);
3461 __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
3462 __ Crc32b(out, out, array_elem);
3463
3464 __ Bind(&aligned2);
3465 __ Tbz(ptr, 1, &aligned4);
3466 __ Subs(len, len, 2);
3467 __ B(&process_1byte, lo);
3468 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3469 __ Crc32h(out, out, array_elem);
3470
3471 __ Bind(&aligned4);
3472 __ Tbz(ptr, 2, &aligned8);
3473 __ Subs(len, len, 4);
3474 __ B(&process_2bytes, lo);
3475 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3476 __ Crc32w(out, out, array_elem);
3477
3478 __ Bind(&aligned8);
3479 __ Subs(len, len, 8);
3480 // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
3481 __ B(&process_4bytes, lo);
3482
3483 // The main loop processing data by 8 bytes.
3484 __ Bind(&loop);
3485 __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
3486 __ Subs(len, len, 8);
3487 __ Crc32x(out, out, array_elem.X());
3488 // if len >= 8, process the next 8 bytes.
3489 __ B(&loop, hs);
3490
3491 // Process the data which is less than 8 bytes.
3492 // The code generated below works with values of len
3493 // which come in the range [-8, 0].
3494 // The first three bits are used to detect whether 4 bytes or 2 bytes or
3495 // a byte can be processed.
3496 // The checking order is from bit 2 to bit 0:
3497 // bit 2 is set: at least 4 bytes available
3498 // bit 1 is set: at least 2 bytes available
3499 // bit 0 is set: at least a byte available
3500 __ Bind(&process_4bytes);
3501 // Goto process_2bytes if less than four bytes available
3502 __ Tbz(len, 2, &process_2bytes);
3503 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3504 __ Crc32w(out, out, array_elem);
3505
3506 __ Bind(&process_2bytes);
3507 // Goto process_1bytes if less than two bytes available
3508 __ Tbz(len, 1, &process_1byte);
3509 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3510 __ Crc32h(out, out, array_elem);
3511
3512 __ Bind(&process_1byte);
3513 // Goto done if no bytes available
3514 __ Tbz(len, 0, &done);
3515 __ Ldrb(array_elem, MemOperand(ptr));
3516 __ Crc32b(out, out, array_elem);
3517
3518 __ Bind(&done);
3519 __ Mvn(out, out);
3520 }
3521
3522 // The threshold for sizes of arrays to use the library provided implementation
3523 // of CRC32.updateBytes instead of the intrinsic.
3524 static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
3525
VisitCRC32UpdateBytes(HInvoke * invoke)3526 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3527 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3528 return;
3529 }
3530
3531 LocationSummary* locations =
3532 new (allocator_) LocationSummary(invoke,
3533 LocationSummary::kCallOnSlowPath,
3534 kIntrinsified);
3535
3536 locations->SetInAt(0, Location::RequiresRegister());
3537 locations->SetInAt(1, Location::RequiresRegister());
3538 locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
3539 locations->SetInAt(3, Location::RequiresRegister());
3540 locations->AddTemp(Location::RequiresRegister());
3541 locations->SetOut(Location::RequiresRegister());
3542 }
3543
3544 // Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
3545 //
3546 // Note: The intrinsic is not used if len exceeds a threshold.
VisitCRC32UpdateBytes(HInvoke * invoke)3547 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3548 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3549
3550 MacroAssembler* masm = GetVIXLAssembler();
3551 LocationSummary* locations = invoke->GetLocations();
3552
3553 SlowPathCodeARM64* slow_path =
3554 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3555 codegen_->AddSlowPath(slow_path);
3556
3557 Register length = WRegisterFrom(locations->InAt(3));
3558 __ Cmp(length, kCRC32UpdateBytesThreshold);
3559 __ B(slow_path->GetEntryLabel(), hi);
3560
3561 const uint32_t array_data_offset =
3562 mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
3563 Register ptr = XRegisterFrom(locations->GetTemp(0));
3564 Register array = XRegisterFrom(locations->InAt(1));
3565 Location offset = locations->InAt(2);
3566 if (offset.IsConstant()) {
3567 int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
3568 __ Add(ptr, array, array_data_offset + offset_value);
3569 } else {
3570 __ Add(ptr, array, array_data_offset);
3571 __ Add(ptr, ptr, XRegisterFrom(offset));
3572 }
3573
3574 Register crc = WRegisterFrom(locations->InAt(0));
3575 Register out = WRegisterFrom(locations->Out());
3576
3577 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3578
3579 __ Bind(slow_path->GetExitLabel());
3580 }
3581
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3582 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3583 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3584 return;
3585 }
3586
3587 LocationSummary* locations =
3588 new (allocator_) LocationSummary(invoke,
3589 LocationSummary::kNoCall,
3590 kIntrinsified);
3591
3592 locations->SetInAt(0, Location::RequiresRegister());
3593 locations->SetInAt(1, Location::RequiresRegister());
3594 locations->SetInAt(2, Location::RequiresRegister());
3595 locations->SetInAt(3, Location::RequiresRegister());
3596 locations->AddTemp(Location::RequiresRegister());
3597 locations->SetOut(Location::RequiresRegister());
3598 }
3599
3600 // Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
3601 //
3602 // There is no need to generate code checking if addr is 0.
3603 // The method updateByteBuffer is a private method of java.util.zip.CRC32.
3604 // This guarantees no calls outside of the CRC32 class.
3605 // An address of DirectBuffer is always passed to the call of updateByteBuffer.
3606 // It might be an implementation of an empty DirectBuffer which can use a zero
3607 // address but it must have the length to be zero. The current generated code
3608 // correctly works with the zero length.
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3609 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3610 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3611
3612 MacroAssembler* masm = GetVIXLAssembler();
3613 LocationSummary* locations = invoke->GetLocations();
3614
3615 Register addr = XRegisterFrom(locations->InAt(1));
3616 Register ptr = XRegisterFrom(locations->GetTemp(0));
3617 __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
3618
3619 Register crc = WRegisterFrom(locations->InAt(0));
3620 Register length = WRegisterFrom(locations->InAt(3));
3621 Register out = WRegisterFrom(locations->Out());
3622 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3623 }
3624
VisitFP16ToFloat(HInvoke * invoke)3625 void IntrinsicLocationsBuilderARM64::VisitFP16ToFloat(HInvoke* invoke) {
3626 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3627 return;
3628 }
3629
3630 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3631 LocationSummary::kNoCall,
3632 kIntrinsified);
3633 locations->SetInAt(0, Location::RequiresRegister());
3634 locations->SetOut(Location::RequiresFpuRegister());
3635 }
3636
VisitFP16ToFloat(HInvoke * invoke)3637 void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) {
3638 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3639 MacroAssembler* masm = GetVIXLAssembler();
3640 UseScratchRegisterScope scratch_scope(masm);
3641 Register bits = InputRegisterAt(invoke, 0);
3642 VRegister out = SRegisterFrom(invoke->GetLocations()->Out());
3643 VRegister half = scratch_scope.AcquireH();
3644 __ Fmov(half, bits); // ARMv8.2
3645 __ Fcvt(out, half);
3646 }
3647
VisitFP16ToHalf(HInvoke * invoke)3648 void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) {
3649 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3650 return;
3651 }
3652
3653 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3654 LocationSummary::kNoCall,
3655 kIntrinsified);
3656 locations->SetInAt(0, Location::RequiresFpuRegister());
3657 locations->SetOut(Location::RequiresRegister());
3658 }
3659
VisitFP16ToHalf(HInvoke * invoke)3660 void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) {
3661 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3662 MacroAssembler* masm = GetVIXLAssembler();
3663 UseScratchRegisterScope scratch_scope(masm);
3664 VRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0));
3665 VRegister half = scratch_scope.AcquireH();
3666 Register out = WRegisterFrom(invoke->GetLocations()->Out());
3667 __ Fcvt(half, in);
3668 __ Fmov(out, half);
3669 __ Sxth(out, out); // sign extend due to returning a short type.
3670 }
3671
3672 template<typename OP>
GenerateFP16Round(HInvoke * invoke,CodeGeneratorARM64 * const codegen_,MacroAssembler * masm,const OP roundOp)3673 void GenerateFP16Round(HInvoke* invoke,
3674 CodeGeneratorARM64* const codegen_,
3675 MacroAssembler* masm,
3676 const OP roundOp) {
3677 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3678 LocationSummary* locations = invoke->GetLocations();
3679 UseScratchRegisterScope scratch_scope(masm);
3680 Register out = WRegisterFrom(locations->Out());
3681 VRegister half = scratch_scope.AcquireH();
3682 __ Fmov(half, WRegisterFrom(locations->InAt(0)));
3683 roundOp(half, half);
3684 __ Fmov(out, half);
3685 __ Sxth(out, out);
3686 }
3687
VisitFP16Floor(HInvoke * invoke)3688 void IntrinsicLocationsBuilderARM64::VisitFP16Floor(HInvoke* invoke) {
3689 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3690 return;
3691 }
3692
3693 CreateIntToIntLocations(allocator_, invoke);
3694 }
3695
VisitFP16Floor(HInvoke * invoke)3696 void IntrinsicCodeGeneratorARM64::VisitFP16Floor(HInvoke* invoke) {
3697 MacroAssembler* masm = GetVIXLAssembler();
3698 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3699 __ Frintm(out, in); // Round towards Minus infinity
3700 };
3701 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3702 }
3703
VisitFP16Ceil(HInvoke * invoke)3704 void IntrinsicLocationsBuilderARM64::VisitFP16Ceil(HInvoke* invoke) {
3705 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3706 return;
3707 }
3708
3709 CreateIntToIntLocations(allocator_, invoke);
3710 }
3711
VisitFP16Ceil(HInvoke * invoke)3712 void IntrinsicCodeGeneratorARM64::VisitFP16Ceil(HInvoke* invoke) {
3713 MacroAssembler* masm = GetVIXLAssembler();
3714 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3715 __ Frintp(out, in); // Round towards Plus infinity
3716 };
3717 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3718 }
3719
VisitFP16Rint(HInvoke * invoke)3720 void IntrinsicLocationsBuilderARM64::VisitFP16Rint(HInvoke* invoke) {
3721 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3722 return;
3723 }
3724
3725 CreateIntToIntLocations(allocator_, invoke);
3726 }
3727
VisitFP16Rint(HInvoke * invoke)3728 void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) {
3729 MacroAssembler* masm = GetVIXLAssembler();
3730 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3731 __ Frintn(out, in); // Round to nearest, with ties to even
3732 };
3733 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3734 }
3735
3736 template<typename OP>
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,const OP compareOp)3737 void GenerateFP16Compare(HInvoke* invoke,
3738 CodeGeneratorARM64* codegen,
3739 MacroAssembler* masm,
3740 const OP compareOp) {
3741 DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
3742 LocationSummary* locations = invoke->GetLocations();
3743 Register out = WRegisterFrom(locations->Out());
3744 VRegister half0 = HRegisterFrom(locations->GetTemp(0));
3745 VRegister half1 = HRegisterFrom(locations->GetTemp(1));
3746 __ Fmov(half0, WRegisterFrom(locations->InAt(0)));
3747 __ Fmov(half1, WRegisterFrom(locations->InAt(1)));
3748 compareOp(out, half0, half1);
3749 }
3750
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,vixl::aarch64::Condition cond)3751 static inline void GenerateFP16Compare(HInvoke* invoke,
3752 CodeGeneratorARM64* codegen,
3753 MacroAssembler* masm,
3754 vixl::aarch64::Condition cond) {
3755 auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) {
3756 __ Fcmp(in0, in1);
3757 __ Cset(out, cond);
3758 };
3759 GenerateFP16Compare(invoke, codegen, masm, compareOp);
3760 }
3761
VisitFP16Greater(HInvoke * invoke)3762 void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) {
3763 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3764 return;
3765 }
3766
3767 CreateIntIntToIntLocations(allocator_, invoke);
3768 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3769 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3770 }
3771
VisitFP16Greater(HInvoke * invoke)3772 void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) {
3773 MacroAssembler* masm = GetVIXLAssembler();
3774 GenerateFP16Compare(invoke, codegen_, masm, gt);
3775 }
3776
VisitFP16GreaterEquals(HInvoke * invoke)3777 void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
3778 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3779 return;
3780 }
3781
3782 CreateIntIntToIntLocations(allocator_, invoke);
3783 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3784 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3785 }
3786
VisitFP16GreaterEquals(HInvoke * invoke)3787 void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
3788 MacroAssembler* masm = GetVIXLAssembler();
3789 GenerateFP16Compare(invoke, codegen_, masm, ge);
3790 }
3791
VisitFP16Less(HInvoke * invoke)3792 void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) {
3793 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3794 return;
3795 }
3796
3797 CreateIntIntToIntLocations(allocator_, invoke);
3798 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3799 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3800 }
3801
VisitFP16Less(HInvoke * invoke)3802 void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) {
3803 MacroAssembler* masm = GetVIXLAssembler();
3804 GenerateFP16Compare(invoke, codegen_, masm, mi);
3805 }
3806
VisitFP16LessEquals(HInvoke * invoke)3807 void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) {
3808 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3809 return;
3810 }
3811
3812 CreateIntIntToIntLocations(allocator_, invoke);
3813 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3814 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3815 }
3816
VisitFP16LessEquals(HInvoke * invoke)3817 void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) {
3818 MacroAssembler* masm = GetVIXLAssembler();
3819 GenerateFP16Compare(invoke, codegen_, masm, ls);
3820 }
3821
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorARM64 * codegen)3822 static void GenerateDivideUnsigned(HInvoke* invoke, CodeGeneratorARM64* codegen) {
3823 LocationSummary* locations = invoke->GetLocations();
3824 MacroAssembler* masm = codegen->GetVIXLAssembler();
3825 DataType::Type type = invoke->GetType();
3826 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3827
3828 Register dividend = RegisterFrom(locations->InAt(0), type);
3829 Register divisor = RegisterFrom(locations->InAt(1), type);
3830 Register out = RegisterFrom(locations->Out(), type);
3831
3832 // Check if divisor is zero, bail to managed implementation to handle.
3833 SlowPathCodeARM64* slow_path =
3834 new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3835 codegen->AddSlowPath(slow_path);
3836 __ Cbz(divisor, slow_path->GetEntryLabel());
3837
3838 __ Udiv(out, dividend, divisor);
3839
3840 __ Bind(slow_path->GetExitLabel());
3841 }
3842
VisitIntegerDivideUnsigned(HInvoke * invoke)3843 void IntrinsicLocationsBuilderARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3844 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
3845 }
3846
VisitIntegerDivideUnsigned(HInvoke * invoke)3847 void IntrinsicCodeGeneratorARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3848 GenerateDivideUnsigned(invoke, codegen_);
3849 }
3850
VisitLongDivideUnsigned(HInvoke * invoke)3851 void IntrinsicLocationsBuilderARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
3852 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
3853 }
3854
VisitLongDivideUnsigned(HInvoke * invoke)3855 void IntrinsicCodeGeneratorARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
3856 GenerateDivideUnsigned(invoke, codegen_);
3857 }
3858
VisitMathMultiplyHigh(HInvoke * invoke)3859 void IntrinsicLocationsBuilderARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
3860 CreateIntIntToIntLocations(allocator_, invoke);
3861 }
3862
VisitMathMultiplyHigh(HInvoke * invoke)3863 void IntrinsicCodeGeneratorARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
3864 LocationSummary* locations = invoke->GetLocations();
3865 MacroAssembler* masm = codegen_->GetVIXLAssembler();
3866 DataType::Type type = invoke->GetType();
3867 DCHECK(type == DataType::Type::kInt64);
3868
3869 Register x = RegisterFrom(locations->InAt(0), type);
3870 Register y = RegisterFrom(locations->InAt(1), type);
3871 Register out = RegisterFrom(locations->Out(), type);
3872
3873 __ Smulh(out, x, y);
3874 }
3875
3876 class VarHandleSlowPathARM64 : public IntrinsicSlowPathARM64 {
3877 public:
VarHandleSlowPathARM64(HInvoke * invoke,std::memory_order order)3878 VarHandleSlowPathARM64(HInvoke* invoke, std::memory_order order)
3879 : IntrinsicSlowPathARM64(invoke),
3880 order_(order),
3881 return_success_(false),
3882 strong_(false),
3883 get_and_update_op_(GetAndUpdateOp::kAdd) {
3884 }
3885
GetByteArrayViewCheckLabel()3886 vixl::aarch64::Label* GetByteArrayViewCheckLabel() {
3887 return &byte_array_view_check_label_;
3888 }
3889
GetNativeByteOrderLabel()3890 vixl::aarch64::Label* GetNativeByteOrderLabel() {
3891 return &native_byte_order_label_;
3892 }
3893
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)3894 void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
3895 if (return_success) {
3896 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
3897 } else {
3898 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
3899 }
3900 return_success_ = return_success;
3901 strong_ = strong;
3902 }
3903
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3904 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3905 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
3906 get_and_update_op_ = get_and_update_op;
3907 }
3908
EmitNativeCode(CodeGenerator * codegen_in)3909 void EmitNativeCode(CodeGenerator* codegen_in) override {
3910 if (GetByteArrayViewCheckLabel()->IsLinked()) {
3911 EmitByteArrayViewCode(codegen_in);
3912 }
3913 IntrinsicSlowPathARM64::EmitNativeCode(codegen_in);
3914 }
3915
3916 private:
GetInvoke() const3917 HInvoke* GetInvoke() const {
3918 return GetInstruction()->AsInvoke();
3919 }
3920
GetAccessModeTemplate() const3921 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3922 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3923 }
3924
3925 void EmitByteArrayViewCode(CodeGenerator* codegen_in);
3926
3927 vixl::aarch64::Label byte_array_view_check_label_;
3928 vixl::aarch64::Label native_byte_order_label_;
3929 // Shared parameter for all VarHandle intrinsics.
3930 std::memory_order order_;
3931 // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
3932 bool return_success_;
3933 bool strong_;
3934 // Extra argument for GenerateVarHandleGetAndUpdate().
3935 GetAndUpdateOp get_and_update_op_;
3936 };
3937
3938 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path,Register object,Register type,bool object_can_be_null=true)3939 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64* codegen,
3940 SlowPathCodeARM64* slow_path,
3941 Register object,
3942 Register type,
3943 bool object_can_be_null = true) {
3944 MacroAssembler* masm = codegen->GetVIXLAssembler();
3945
3946 const MemberOffset class_offset = mirror::Object::ClassOffset();
3947 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3948
3949 vixl::aarch64::Label success;
3950 if (object_can_be_null) {
3951 __ Cbz(object, &success);
3952 }
3953
3954 UseScratchRegisterScope temps(masm);
3955 Register temp = temps.AcquireW();
3956
3957 __ Ldr(temp, HeapOperand(object, class_offset.Int32Value()));
3958 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3959 vixl::aarch64::Label loop;
3960 __ Bind(&loop);
3961 __ Cmp(type, temp);
3962 __ B(&success, eq);
3963 __ Ldr(temp, HeapOperand(temp, super_class_offset.Int32Value()));
3964 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3965 __ Cbz(temp, slow_path->GetEntryLabel());
3966 __ B(&loop);
3967 __ Bind(&success);
3968 }
3969
3970 // Check access mode and the primitive type from VarHandle.varType.
3971 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3972 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path,DataType::Type type)3973 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3974 CodeGeneratorARM64* codegen,
3975 SlowPathCodeARM64* slow_path,
3976 DataType::Type type) {
3977 mirror::VarHandle::AccessMode access_mode =
3978 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3979 Primitive::Type primitive_type = DataTypeToPrimitive(type);
3980
3981 MacroAssembler* masm = codegen->GetVIXLAssembler();
3982 Register varhandle = InputRegisterAt(invoke, 0);
3983
3984 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3985 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3986 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3987
3988 UseScratchRegisterScope temps(masm);
3989 Register var_type_no_rb = temps.AcquireW();
3990 Register temp2 = temps.AcquireW();
3991
3992 // Check that the operation is permitted and the primitive type of varhandle.varType.
3993 // We do not need a read barrier when loading a reference only for loading constant
3994 // primitive field through the reference. Use LDP to load the fields together.
3995 DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
3996 __ Ldp(var_type_no_rb, temp2, HeapOperand(varhandle, var_type_offset.Int32Value()));
3997 codegen->GetAssembler()->MaybeUnpoisonHeapReference(var_type_no_rb);
3998 __ Tbz(temp2, static_cast<uint32_t>(access_mode), slow_path->GetEntryLabel());
3999 __ Ldrh(temp2, HeapOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
4000 if (primitive_type == Primitive::kPrimNot) {
4001 static_assert(Primitive::kPrimNot == 0);
4002 __ Cbnz(temp2, slow_path->GetEntryLabel());
4003 } else {
4004 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4005 __ B(slow_path->GetEntryLabel(), ne);
4006 }
4007
4008 temps.Release(temp2);
4009
4010 if (type == DataType::Type::kReference) {
4011 // Check reference arguments against the varType.
4012 // False negatives due to varType being an interface or array type
4013 // or due to the missing read barrier are handled by the slow path.
4014 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4015 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4016 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4017 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4018 HInstruction* arg = invoke->InputAt(arg_index);
4019 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4020 if (!arg->IsNullConstant()) {
4021 Register arg_reg = WRegisterFrom(invoke->GetLocations()->InAt(arg_index));
4022 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4023 }
4024 }
4025 }
4026 }
4027
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path)4028 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4029 CodeGeneratorARM64* codegen,
4030 SlowPathCodeARM64* slow_path) {
4031 MacroAssembler* masm = codegen->GetVIXLAssembler();
4032 Register varhandle = InputRegisterAt(invoke, 0);
4033
4034 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4035
4036 UseScratchRegisterScope temps(masm);
4037 Register temp = temps.AcquireW();
4038
4039 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4040 // Do not emit read barrier (or unpoison the reference) for comparing to null.
4041 __ Ldr(temp, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4042 __ Cbnz(temp, slow_path->GetEntryLabel());
4043 }
4044
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,SlowPathCodeARM64 * slow_path)4045 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4046 CodeGeneratorARM64* codegen,
4047 SlowPathCodeARM64* slow_path) {
4048 MacroAssembler* masm = codegen->GetVIXLAssembler();
4049 Register varhandle = InputRegisterAt(invoke, 0);
4050 Register object = InputRegisterAt(invoke, 1);
4051
4052 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4053 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4054
4055 // Null-check the object.
4056 __ Cbz(object, slow_path->GetEntryLabel());
4057
4058 UseScratchRegisterScope temps(masm);
4059 Register temp = temps.AcquireW();
4060 Register temp2 = temps.AcquireW();
4061
4062 // Check that the VarHandle references an instance field by checking that
4063 // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4064 // type compatibility check with the source object's type, which will fail for null.
4065 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4066 __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4067 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4068 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4069 __ Cbnz(temp2, slow_path->GetEntryLabel());
4070
4071 // Check that the object has the correct type.
4072 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4073 temps.Release(temp2); // Needed by GenerateSubTypeObjectCheckNoReadBarrier().
4074 GenerateSubTypeObjectCheckNoReadBarrier(
4075 codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4076 }
4077
GetVarHandleExpectedValueType(HInvoke * invoke,size_t expected_coordinates_count)4078 static DataType::Type GetVarHandleExpectedValueType(HInvoke* invoke,
4079 size_t expected_coordinates_count) {
4080 DCHECK_EQ(expected_coordinates_count, GetExpectedVarHandleCoordinatesCount(invoke));
4081 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4082 DCHECK_GE(number_of_arguments, /* VarHandle object */ 1u + expected_coordinates_count);
4083 if (number_of_arguments == /* VarHandle object */ 1u + expected_coordinates_count) {
4084 return invoke->GetType();
4085 } else {
4086 return GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4087 }
4088 }
4089
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,VarHandleSlowPathARM64 * slow_path)4090 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4091 CodeGeneratorARM64* codegen,
4092 VarHandleSlowPathARM64* slow_path) {
4093 MacroAssembler* masm = codegen->GetVIXLAssembler();
4094 Register varhandle = InputRegisterAt(invoke, 0);
4095 Register object = InputRegisterAt(invoke, 1);
4096 Register index = InputRegisterAt(invoke, 2);
4097 DataType::Type value_type =
4098 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4099 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4100
4101 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4102 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4103 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4104 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4105 const MemberOffset class_offset = mirror::Object::ClassOffset();
4106 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4107
4108 // Null-check the object.
4109 __ Cbz(object, slow_path->GetEntryLabel());
4110
4111 UseScratchRegisterScope temps(masm);
4112 Register temp = temps.AcquireW();
4113 Register temp2 = temps.AcquireW();
4114
4115 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4116 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4117 // coordinateType0 shall not be null but we do not explicitly verify that.
4118 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4119 __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4120 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4121 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4122 __ Cbz(temp2, slow_path->GetEntryLabel());
4123
4124 // Check object class against componentType0.
4125 //
4126 // This is an exact check and we defer other cases to the runtime. This includes
4127 // conversion to array of superclass references, which is valid but subsequently
4128 // requires all update operations to check that the value can indeed be stored.
4129 // We do not want to perform such extra checks in the intrinsified code.
4130 //
4131 // We do this check without read barrier, so there can be false negatives which we
4132 // defer to the slow path. There shall be no false negatives for array classes in the
4133 // boot image (including Object[] and primitive arrays) because they are non-movable.
4134 __ Ldr(temp2, HeapOperand(object, class_offset.Int32Value()));
4135 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4136 __ Cmp(temp, temp2);
4137 __ B(slow_path->GetEntryLabel(), ne);
4138
4139 // Check that the coordinateType0 is an array type. We do not need a read barrier
4140 // for loading constant reference fields (or chains of them) for comparison with null,
4141 // nor for finally loading a constant primitive field (primitive type) below.
4142 __ Ldr(temp2, HeapOperand(temp, component_type_offset.Int32Value()));
4143 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4144 __ Cbz(temp2, slow_path->GetEntryLabel());
4145
4146 // Check that the array component type matches the primitive type.
4147 __ Ldrh(temp2, HeapOperand(temp2, primitive_type_offset.Int32Value()));
4148 if (primitive_type == Primitive::kPrimNot) {
4149 static_assert(Primitive::kPrimNot == 0);
4150 __ Cbnz(temp2, slow_path->GetEntryLabel());
4151 } else {
4152 // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
4153 // we shall check for a byte array view in the slow path.
4154 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4155 // so we cannot emit that if we're JITting without boot image.
4156 bool boot_image_available =
4157 codegen->GetCompilerOptions().IsBootImage() ||
4158 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4159 DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
4160 size_t can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
4161 vixl::aarch64::Label* slow_path_label =
4162 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4163 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4164 __ B(slow_path_label, ne);
4165 }
4166
4167 // Check for array index out of bounds.
4168 __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
4169 __ Cmp(index, temp);
4170 __ B(slow_path->GetEntryLabel(), hs);
4171 }
4172
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,VarHandleSlowPathARM64 * slow_path)4173 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4174 CodeGeneratorARM64* codegen,
4175 VarHandleSlowPathARM64* slow_path) {
4176 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4177 if (expected_coordinates_count == 0u) {
4178 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4179 } else if (expected_coordinates_count == 1u) {
4180 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4181 } else {
4182 DCHECK_EQ(expected_coordinates_count, 2u);
4183 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4184 }
4185 }
4186
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,DataType::Type type)4187 static VarHandleSlowPathARM64* GenerateVarHandleChecks(HInvoke* invoke,
4188 CodeGeneratorARM64* codegen,
4189 std::memory_order order,
4190 DataType::Type type) {
4191 VarHandleSlowPathARM64* slow_path =
4192 new (codegen->GetScopedAllocator()) VarHandleSlowPathARM64(invoke, order);
4193 codegen->AddSlowPath(slow_path);
4194
4195 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4196 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4197
4198 return slow_path;
4199 }
4200
4201 struct VarHandleTarget {
4202 Register object; // The object holding the value to operate on.
4203 Register offset; // The offset of the value to operate on.
4204 };
4205
GetVarHandleTarget(HInvoke * invoke)4206 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4207 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4208 LocationSummary* locations = invoke->GetLocations();
4209
4210 VarHandleTarget target;
4211 // The temporary allocated for loading the offset.
4212 target.offset = WRegisterFrom(locations->GetTemp(0u));
4213 // The reference to the object that holds the value to operate on.
4214 target.object = (expected_coordinates_count == 0u)
4215 ? WRegisterFrom(locations->GetTemp(1u))
4216 : InputRegisterAt(invoke, 1);
4217 return target;
4218 }
4219
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARM64 * codegen)4220 static void GenerateVarHandleTarget(HInvoke* invoke,
4221 const VarHandleTarget& target,
4222 CodeGeneratorARM64* codegen) {
4223 MacroAssembler* masm = codegen->GetVIXLAssembler();
4224 Register varhandle = InputRegisterAt(invoke, 0);
4225 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4226
4227 if (expected_coordinates_count <= 1u) {
4228 // For static fields, we need to fill the `target.object` with the declaring class,
4229 // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
4230 // we do not need the declaring class, so we can forget the `ArtMethod*` when
4231 // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
4232 Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
4233
4234 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4235 const MemberOffset offset_offset = ArtField::OffsetOffset();
4236
4237 // Load the ArtField, the offset and, if needed, declaring class.
4238 __ Ldr(method.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
4239 __ Ldr(target.offset, MemOperand(method.X(), offset_offset.Int32Value()));
4240 if (expected_coordinates_count == 0u) {
4241 codegen->GenerateGcRootFieldLoad(invoke,
4242 LocationFrom(target.object),
4243 method.X(),
4244 ArtField::DeclaringClassOffset().Int32Value(),
4245 /*fixup_label=*/ nullptr,
4246 kCompilerReadBarrierOption);
4247 }
4248 } else {
4249 DCHECK_EQ(expected_coordinates_count, 2u);
4250 DataType::Type value_type =
4251 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4252 size_t size_shift = DataType::SizeShift(value_type);
4253 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4254
4255 Register index = InputRegisterAt(invoke, 2);
4256 Register shifted_index = index;
4257 if (size_shift != 0u) {
4258 shifted_index = target.offset;
4259 __ Lsl(shifted_index, index, size_shift);
4260 }
4261 __ Add(target.offset, shifted_index, data_offset.Int32Value());
4262 }
4263 }
4264
HasVarHandleIntrinsicImplementation(HInvoke * invoke)4265 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
4266 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4267 if (expected_coordinates_count > 2u) {
4268 // Invalid coordinate count. This invoke shall throw at runtime.
4269 return false;
4270 }
4271 if (expected_coordinates_count != 0u &&
4272 invoke->InputAt(1)->GetType() != DataType::Type::kReference) {
4273 // Except for static fields (no coordinates), the first coordinate must be a reference.
4274 return false;
4275 }
4276 if (expected_coordinates_count == 2u) {
4277 // For arrays and views, the second coordinate must be convertible to `int`.
4278 // In this context, `boolean` is not convertible but we have to look at the shorty
4279 // as compiler transformations can give the invoke a valid boolean input.
4280 DataType::Type index_type = GetDataTypeFromShorty(invoke, 2);
4281 if (index_type == DataType::Type::kBool ||
4282 DataType::Kind(index_type) != DataType::Type::kInt32) {
4283 return false;
4284 }
4285 }
4286
4287 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4288 DataType::Type return_type = invoke->GetType();
4289 mirror::VarHandle::AccessModeTemplate access_mode_template =
4290 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4291 switch (access_mode_template) {
4292 case mirror::VarHandle::AccessModeTemplate::kGet:
4293 // The return type should be the same as varType, so it shouldn't be void.
4294 if (return_type == DataType::Type::kVoid) {
4295 return false;
4296 }
4297 break;
4298 case mirror::VarHandle::AccessModeTemplate::kSet:
4299 if (return_type != DataType::Type::kVoid) {
4300 return false;
4301 }
4302 break;
4303 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet: {
4304 if (return_type != DataType::Type::kBool) {
4305 return false;
4306 }
4307 uint32_t expected_value_index = number_of_arguments - 2;
4308 uint32_t new_value_index = number_of_arguments - 1;
4309 DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4310 DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
4311 if (expected_value_type != new_value_type) {
4312 return false;
4313 }
4314 break;
4315 }
4316 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
4317 uint32_t expected_value_index = number_of_arguments - 2;
4318 uint32_t new_value_index = number_of_arguments - 1;
4319 DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4320 DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
4321 if (expected_value_type != new_value_type || return_type != expected_value_type) {
4322 return false;
4323 }
4324 break;
4325 }
4326 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
4327 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
4328 if (IsVarHandleGetAndAdd(invoke) &&
4329 (value_type == DataType::Type::kReference || value_type == DataType::Type::kBool)) {
4330 // We should only add numerical types.
4331 return false;
4332 } else if (IsVarHandleGetAndBitwiseOp(invoke) && !DataType::IsIntegralType(value_type)) {
4333 // We can only apply operators to bitwise integral types.
4334 // Note that bitwise VarHandle operations accept a non-integral boolean type and
4335 // perform the appropriate logical operation. However, the result is the same as
4336 // using the bitwise operation on our boolean representation and this fits well
4337 // with DataType::IsIntegralType() treating the compiler type kBool as integral.
4338 return false;
4339 }
4340 if (value_type != return_type) {
4341 return false;
4342 }
4343 break;
4344 }
4345 }
4346
4347 return true;
4348 }
4349
CreateVarHandleCommonLocations(HInvoke * invoke)4350 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
4351 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4352 DataType::Type return_type = invoke->GetType();
4353
4354 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4355 LocationSummary* locations =
4356 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4357 locations->SetInAt(0, Location::RequiresRegister());
4358 // Require coordinates in registers. These are the object holding the value
4359 // to operate on (except for static fields) and index (for arrays and views).
4360 for (size_t i = 0; i != expected_coordinates_count; ++i) {
4361 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4362 }
4363 if (return_type != DataType::Type::kVoid) {
4364 if (DataType::IsFloatingPointType(return_type)) {
4365 locations->SetOut(Location::RequiresFpuRegister());
4366 } else {
4367 locations->SetOut(Location::RequiresRegister());
4368 }
4369 }
4370 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4371 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4372 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4373 HInstruction* arg = invoke->InputAt(arg_index);
4374 if (IsConstantZeroBitPattern(arg)) {
4375 locations->SetInAt(arg_index, Location::ConstantLocation(arg->AsConstant()));
4376 } else if (DataType::IsFloatingPointType(arg->GetType())) {
4377 locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4378 } else {
4379 locations->SetInAt(arg_index, Location::RequiresRegister());
4380 }
4381 }
4382
4383 // Add a temporary for offset.
4384 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4385 GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4386 // To preserve the offset value across the non-Baker read barrier slow path
4387 // for loading the declaring class, use a fixed callee-save register.
4388 constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4389 locations->AddTemp(Location::RegisterLocation(first_callee_save));
4390 } else {
4391 locations->AddTemp(Location::RequiresRegister());
4392 }
4393 if (expected_coordinates_count == 0u) {
4394 // Add a temporary to hold the declaring class.
4395 locations->AddTemp(Location::RequiresRegister());
4396 }
4397
4398 return locations;
4399 }
4400
CreateVarHandleGetLocations(HInvoke * invoke)4401 static void CreateVarHandleGetLocations(HInvoke* invoke) {
4402 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4403 return;
4404 }
4405
4406 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4407 invoke->GetType() == DataType::Type::kReference &&
4408 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4409 invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4410 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4411 // the passed reference and reloads it from the field. This gets the memory visibility
4412 // wrong for Acquire/Volatile operations. b/173104084
4413 return;
4414 }
4415
4416 CreateVarHandleCommonLocations(invoke);
4417 }
4418
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool byte_swap=false)4419 static void GenerateVarHandleGet(HInvoke* invoke,
4420 CodeGeneratorARM64* codegen,
4421 std::memory_order order,
4422 bool byte_swap = false) {
4423 DataType::Type type = invoke->GetType();
4424 DCHECK_NE(type, DataType::Type::kVoid);
4425
4426 LocationSummary* locations = invoke->GetLocations();
4427 MacroAssembler* masm = codegen->GetVIXLAssembler();
4428 CPURegister out = helpers::OutputCPURegister(invoke);
4429
4430 VarHandleTarget target = GetVarHandleTarget(invoke);
4431 VarHandleSlowPathARM64* slow_path = nullptr;
4432 if (!byte_swap) {
4433 slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4434 GenerateVarHandleTarget(invoke, target, codegen);
4435 __ Bind(slow_path->GetNativeByteOrderLabel());
4436 }
4437
4438 // ARM64 load-acquire instructions are implicitly sequentially consistent.
4439 bool use_load_acquire =
4440 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
4441 DCHECK(use_load_acquire || order == std::memory_order_relaxed);
4442
4443 // Load the value from the target location.
4444 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4445 // Piggy-back on the field load path using introspection for the Baker read barrier.
4446 // The `target.offset` is a temporary, use it for field address.
4447 Register tmp_ptr = target.offset.X();
4448 __ Add(tmp_ptr, target.object.X(), target.offset.X());
4449 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
4450 locations->Out(),
4451 target.object,
4452 MemOperand(tmp_ptr),
4453 /*needs_null_check=*/ false,
4454 use_load_acquire);
4455 DCHECK(!byte_swap);
4456 } else {
4457 MemOperand address(target.object.X(), target.offset.X());
4458 CPURegister load_reg = out;
4459 DataType::Type load_type = type;
4460 UseScratchRegisterScope temps(masm);
4461 if (byte_swap) {
4462 if (type == DataType::Type::kInt16) {
4463 // Avoid unnecessary sign extension before REV16.
4464 load_type = DataType::Type::kUint16;
4465 } else if (type == DataType::Type::kFloat32) {
4466 load_type = DataType::Type::kInt32;
4467 load_reg = target.offset.W();
4468 } else if (type == DataType::Type::kFloat64) {
4469 load_type = DataType::Type::kInt64;
4470 load_reg = target.offset.X();
4471 }
4472 }
4473 if (use_load_acquire) {
4474 codegen->LoadAcquire(invoke, load_type, load_reg, address, /*needs_null_check=*/ false);
4475 } else {
4476 codegen->Load(load_type, load_reg, address);
4477 }
4478 if (type == DataType::Type::kReference) {
4479 DCHECK(!byte_swap);
4480 DCHECK(out.IsW());
4481 Location out_loc = locations->Out();
4482 Location object_loc = LocationFrom(target.object);
4483 Location offset_loc = LocationFrom(target.offset);
4484 codegen->MaybeGenerateReadBarrierSlow(invoke, out_loc, out_loc, object_loc, 0u, offset_loc);
4485 } else if (byte_swap) {
4486 GenerateReverseBytes(masm, type, load_reg, out);
4487 }
4488 }
4489
4490 if (!byte_swap) {
4491 __ Bind(slow_path->GetExitLabel());
4492 }
4493 }
4494
VisitVarHandleGet(HInvoke * invoke)4495 void IntrinsicLocationsBuilderARM64::VisitVarHandleGet(HInvoke* invoke) {
4496 CreateVarHandleGetLocations(invoke);
4497 }
4498
VisitVarHandleGet(HInvoke * invoke)4499 void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) {
4500 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
4501 }
4502
VisitVarHandleGetOpaque(HInvoke * invoke)4503 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4504 CreateVarHandleGetLocations(invoke);
4505 }
4506
VisitVarHandleGetOpaque(HInvoke * invoke)4507 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4508 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
4509 }
4510
VisitVarHandleGetAcquire(HInvoke * invoke)4511 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4512 CreateVarHandleGetLocations(invoke);
4513 }
4514
VisitVarHandleGetAcquire(HInvoke * invoke)4515 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4516 GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
4517 }
4518
VisitVarHandleGetVolatile(HInvoke * invoke)4519 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4520 CreateVarHandleGetLocations(invoke);
4521 }
4522
VisitVarHandleGetVolatile(HInvoke * invoke)4523 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4524 GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
4525 }
4526
CreateVarHandleSetLocations(HInvoke * invoke)4527 static void CreateVarHandleSetLocations(HInvoke* invoke) {
4528 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4529 return;
4530 }
4531
4532 CreateVarHandleCommonLocations(invoke);
4533 }
4534
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool byte_swap=false)4535 static void GenerateVarHandleSet(HInvoke* invoke,
4536 CodeGeneratorARM64* codegen,
4537 std::memory_order order,
4538 bool byte_swap = false) {
4539 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4540 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4541
4542 MacroAssembler* masm = codegen->GetVIXLAssembler();
4543 CPURegister value = InputCPURegisterOrZeroRegAt(invoke, value_index);
4544
4545 VarHandleTarget target = GetVarHandleTarget(invoke);
4546 VarHandleSlowPathARM64* slow_path = nullptr;
4547 if (!byte_swap) {
4548 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4549 GenerateVarHandleTarget(invoke, target, codegen);
4550 __ Bind(slow_path->GetNativeByteOrderLabel());
4551 }
4552
4553 // ARM64 store-release instructions are implicitly sequentially consistent.
4554 bool use_store_release =
4555 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
4556 DCHECK(use_store_release || order == std::memory_order_relaxed);
4557
4558 // Store the value to the target location.
4559 {
4560 CPURegister source = value;
4561 UseScratchRegisterScope temps(masm);
4562 if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4563 DCHECK(value.IsW());
4564 Register temp = temps.AcquireW();
4565 __ Mov(temp, value.W());
4566 codegen->GetAssembler()->PoisonHeapReference(temp);
4567 source = temp;
4568 }
4569 if (byte_swap) {
4570 DCHECK(!source.IsZero()); // We use the main path for zero as it does not need a byte swap.
4571 Register temp = source.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
4572 if (value_type == DataType::Type::kInt16) {
4573 // Avoid unnecessary sign extension before storing.
4574 value_type = DataType::Type::kUint16;
4575 } else if (DataType::IsFloatingPointType(value_type)) {
4576 __ Fmov(temp, source.Is64Bits() ? source.D() : source.S());
4577 value_type = source.Is64Bits() ? DataType::Type::kInt64 : DataType::Type::kInt32;
4578 source = temp; // Source for the `GenerateReverseBytes()` below.
4579 }
4580 GenerateReverseBytes(masm, value_type, source, temp);
4581 source = temp;
4582 }
4583 MemOperand address(target.object.X(), target.offset.X());
4584 if (use_store_release) {
4585 codegen->StoreRelease(invoke, value_type, source, address, /*needs_null_check=*/ false);
4586 } else {
4587 codegen->Store(value_type, source, address);
4588 }
4589 }
4590
4591 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4592 codegen->MarkGCCard(target.object, Register(value), /*value_can_be_null=*/ true);
4593 }
4594
4595 if (!byte_swap) {
4596 __ Bind(slow_path->GetExitLabel());
4597 }
4598 }
4599
VisitVarHandleSet(HInvoke * invoke)4600 void IntrinsicLocationsBuilderARM64::VisitVarHandleSet(HInvoke* invoke) {
4601 CreateVarHandleSetLocations(invoke);
4602 }
4603
VisitVarHandleSet(HInvoke * invoke)4604 void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) {
4605 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4606 }
4607
VisitVarHandleSetOpaque(HInvoke * invoke)4608 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4609 CreateVarHandleSetLocations(invoke);
4610 }
4611
VisitVarHandleSetOpaque(HInvoke * invoke)4612 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4613 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
4614 }
4615
VisitVarHandleSetRelease(HInvoke * invoke)4616 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
4617 CreateVarHandleSetLocations(invoke);
4618 }
4619
VisitVarHandleSetRelease(HInvoke * invoke)4620 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
4621 GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
4622 }
4623
VisitVarHandleSetVolatile(HInvoke * invoke)4624 void IntrinsicLocationsBuilderARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4625 CreateVarHandleSetLocations(invoke);
4626 }
4627
VisitVarHandleSetVolatile(HInvoke * invoke)4628 void IntrinsicCodeGeneratorARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4629 GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
4630 }
4631
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,bool return_success)4632 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
4633 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4634 return;
4635 }
4636
4637 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4638 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4639 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4640 value_type == DataType::Type::kReference) {
4641 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4642 // the passed reference and reloads it from the field. This breaks the read barriers
4643 // in slow path in different ways. The marked old value may not actually be a to-space
4644 // reference to the same object as `old_value`, breaking slow path assumptions. And
4645 // for CompareAndExchange, marking the old value after comparison failure may actually
4646 // return the reference to `expected`, erroneously indicating success even though we
4647 // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4648 return;
4649 }
4650
4651 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4652
4653 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4654 // We need callee-save registers for both the class object and offset instead of
4655 // the temporaries reserved in CreateVarHandleCommonLocations().
4656 static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
4657 uint32_t first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4658 uint32_t second_callee_save = CTZ(kArm64CalleeSaveRefSpills ^ (1u << first_callee_save));
4659 if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4660 DCHECK_EQ(locations->GetTempCount(), 2u);
4661 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4662 DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4663 locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4664 } else {
4665 DCHECK_EQ(locations->GetTempCount(), 1u);
4666 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4667 locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4668 }
4669 }
4670 size_t old_temp_count = locations->GetTempCount();
4671 DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4672 if (!return_success) {
4673 if (DataType::IsFloatingPointType(value_type)) {
4674 // Add a temporary for old value and exclusive store result if floating point
4675 // `expected` and/or `new_value` take scratch registers.
4676 size_t available_scratch_registers =
4677 (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
4678 (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
4679 size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers;
4680 // We can reuse the declaring class (if present) and offset temporary.
4681 if (temps_needed > old_temp_count) {
4682 locations->AddRegisterTemps(temps_needed - old_temp_count);
4683 }
4684 } else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) &&
4685 !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
4686 !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
4687 GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4688 // Allocate a normal temporary for store result in the non-native byte order path
4689 // because scratch registers are used by the byte-swapped `expected` and `new_value`.
4690 DCHECK_EQ(old_temp_count, 1u);
4691 locations->AddTemp(Location::RequiresRegister());
4692 }
4693 }
4694 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4695 // Add a temporary for the `old_value_temp` in slow path.
4696 locations->AddTemp(Location::RequiresRegister());
4697 }
4698 }
4699
MoveToTempIfFpRegister(const CPURegister & cpu_reg,DataType::Type type,MacroAssembler * masm,UseScratchRegisterScope * temps)4700 static Register MoveToTempIfFpRegister(const CPURegister& cpu_reg,
4701 DataType::Type type,
4702 MacroAssembler* masm,
4703 UseScratchRegisterScope* temps) {
4704 if (cpu_reg.IsS()) {
4705 DCHECK_EQ(type, DataType::Type::kFloat32);
4706 Register reg = temps->AcquireW();
4707 __ Fmov(reg, cpu_reg.S());
4708 return reg;
4709 } else if (cpu_reg.IsD()) {
4710 DCHECK_EQ(type, DataType::Type::kFloat64);
4711 Register reg = temps->AcquireX();
4712 __ Fmov(reg, cpu_reg.D());
4713 return reg;
4714 } else {
4715 return DataType::Is64BitType(type) ? cpu_reg.X() : cpu_reg.W();
4716 }
4717 }
4718
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARM64 * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4719 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4720 CodeGeneratorARM64* codegen,
4721 std::memory_order order,
4722 bool return_success,
4723 bool strong,
4724 bool byte_swap = false) {
4725 DCHECK(return_success || strong);
4726
4727 uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4728 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4729 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4730 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4731
4732 MacroAssembler* masm = codegen->GetVIXLAssembler();
4733 LocationSummary* locations = invoke->GetLocations();
4734 CPURegister expected = InputCPURegisterOrZeroRegAt(invoke, expected_index);
4735 CPURegister new_value = InputCPURegisterOrZeroRegAt(invoke, new_value_index);
4736 CPURegister out = helpers::OutputCPURegister(invoke);
4737
4738 VarHandleTarget target = GetVarHandleTarget(invoke);
4739 VarHandleSlowPathARM64* slow_path = nullptr;
4740 if (!byte_swap) {
4741 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4742 slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4743 GenerateVarHandleTarget(invoke, target, codegen);
4744 __ Bind(slow_path->GetNativeByteOrderLabel());
4745 }
4746
4747 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
4748 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
4749 // Mark card for object assuming new value is stored.
4750 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
4751 codegen->MarkGCCard(target.object, new_value.W(), new_value_can_be_null);
4752 }
4753
4754 // Reuse the `offset` temporary for the pointer to the target location,
4755 // except for references that need the offset for the read barrier.
4756 UseScratchRegisterScope temps(masm);
4757 Register tmp_ptr = target.offset.X();
4758 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4759 tmp_ptr = temps.AcquireX();
4760 }
4761 __ Add(tmp_ptr, target.object.X(), target.offset.X());
4762
4763 // Move floating point values to scratch registers.
4764 // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4765 Register expected_reg = MoveToTempIfFpRegister(expected, value_type, masm, &temps);
4766 Register new_value_reg = MoveToTempIfFpRegister(new_value, value_type, masm, &temps);
4767 bool is_fp = DataType::IsFloatingPointType(value_type);
4768 DataType::Type cas_type = is_fp
4769 ? ((value_type == DataType::Type::kFloat64) ? DataType::Type::kInt64 : DataType::Type::kInt32)
4770 : value_type;
4771 // Avoid sign extension in the CAS loop by zero-extending `expected` before the loop. This adds
4772 // one instruction for CompareAndExchange as we shall need to sign-extend the returned value.
4773 if (value_type == DataType::Type::kInt16 && !expected.IsZero()) {
4774 Register temp = temps.AcquireW();
4775 __ Uxth(temp, expected_reg);
4776 expected_reg = temp;
4777 cas_type = DataType::Type::kUint16;
4778 } else if (value_type == DataType::Type::kInt8 && !expected.IsZero()) {
4779 Register temp = temps.AcquireW();
4780 __ Uxtb(temp, expected_reg);
4781 expected_reg = temp;
4782 cas_type = DataType::Type::kUint8;
4783 }
4784
4785 if (byte_swap) {
4786 // Do the byte swap and move values to scratch registers if needed.
4787 // Non-zero FP values and non-zero `expected` for `kInt16` are already in scratch registers.
4788 DCHECK_NE(value_type, DataType::Type::kInt8);
4789 if (!expected.IsZero()) {
4790 bool is_scratch = is_fp || (value_type == DataType::Type::kInt16);
4791 Register temp = is_scratch ? expected_reg : temps.AcquireSameSizeAs(expected_reg);
4792 GenerateReverseBytes(masm, cas_type, expected_reg, temp);
4793 expected_reg = temp;
4794 }
4795 if (!new_value.IsZero()) {
4796 Register temp = is_fp ? new_value_reg : temps.AcquireSameSizeAs(new_value_reg);
4797 GenerateReverseBytes(masm, cas_type, new_value_reg, temp);
4798 new_value_reg = temp;
4799 }
4800 }
4801
4802 // Prepare registers for old value and the result of the exclusive store.
4803 Register old_value;
4804 Register store_result;
4805 if (return_success) {
4806 // Use the output register for both old value and exclusive store result.
4807 old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
4808 store_result = out.W();
4809 } else if (DataType::IsFloatingPointType(value_type)) {
4810 // We need two temporary registers but we have already used scratch registers for
4811 // holding the expected and new value unless they are zero bit pattern (+0.0f or
4812 // +0.0). We have allocated sufficient normal temporaries to handle that.
4813 size_t next_temp = 1u;
4814 if (expected.IsZero()) {
4815 old_value = (cas_type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
4816 } else {
4817 Location temp = locations->GetTemp(next_temp);
4818 ++next_temp;
4819 old_value = (cas_type == DataType::Type::kInt64) ? XRegisterFrom(temp) : WRegisterFrom(temp);
4820 }
4821 store_result =
4822 new_value.IsZero() ? temps.AcquireW() : WRegisterFrom(locations->GetTemp(next_temp));
4823 DCHECK(!old_value.Is(tmp_ptr));
4824 DCHECK(!store_result.Is(tmp_ptr));
4825 } else {
4826 // Use the output register for the old value.
4827 old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
4828 // Use scratch register for the store result, except when we have used up
4829 // scratch registers for byte-swapped `expected` and `new_value`.
4830 // In that case, we have allocated a normal temporary.
4831 store_result = (byte_swap && !expected.IsZero() && !new_value.IsZero())
4832 ? WRegisterFrom(locations->GetTemp(1))
4833 : temps.AcquireW();
4834 DCHECK(!store_result.Is(tmp_ptr));
4835 }
4836
4837 vixl::aarch64::Label exit_loop_label;
4838 vixl::aarch64::Label* exit_loop = &exit_loop_label;
4839 vixl::aarch64::Label* cmp_failure = &exit_loop_label;
4840
4841 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4842 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
4843 // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
4844 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4845 Register old_value_temp =
4846 WRegisterFrom(locations->GetTemp((expected_coordinates_count == 0u) ? 2u : 1u));
4847 // For strong CAS, use a scratch register for the store result in slow path.
4848 // For weak CAS, we need to check the store result, so store it in `store_result`.
4849 Register slow_path_store_result = strong ? Register() : store_result;
4850 ReadBarrierCasSlowPathARM64* rb_slow_path =
4851 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
4852 invoke,
4853 order,
4854 strong,
4855 target.object,
4856 target.offset.X(),
4857 expected_reg,
4858 new_value_reg,
4859 old_value,
4860 old_value_temp,
4861 slow_path_store_result,
4862 /*update_old_value=*/ !return_success,
4863 codegen);
4864 codegen->AddSlowPath(rb_slow_path);
4865 exit_loop = rb_slow_path->GetExitLabel();
4866 cmp_failure = rb_slow_path->GetEntryLabel();
4867 }
4868
4869 GenerateCompareAndSet(codegen,
4870 cas_type,
4871 order,
4872 strong,
4873 cmp_failure,
4874 tmp_ptr,
4875 new_value_reg,
4876 old_value,
4877 store_result,
4878 expected_reg);
4879 __ Bind(exit_loop);
4880
4881 if (return_success) {
4882 if (strong) {
4883 __ Cset(out.W(), eq);
4884 } else {
4885 // On success, the Z flag is set and the store result is 1, see GenerateCompareAndSet().
4886 // On failure, either the Z flag is clear or the store result is 0.
4887 // Determine the final success value with a CSEL.
4888 __ Csel(out.W(), store_result, wzr, eq);
4889 }
4890 } else if (byte_swap) {
4891 // Also handles moving to FP registers.
4892 GenerateReverseBytes(masm, value_type, old_value, out);
4893 } else if (DataType::IsFloatingPointType(value_type)) {
4894 __ Fmov((value_type == DataType::Type::kFloat64) ? out.D() : out.S(), old_value);
4895 } else if (value_type == DataType::Type::kInt8) {
4896 __ Sxtb(out.W(), old_value);
4897 } else if (value_type == DataType::Type::kInt16) {
4898 __ Sxth(out.W(), old_value);
4899 }
4900
4901 if (!byte_swap) {
4902 __ Bind(slow_path->GetExitLabel());
4903 }
4904 }
4905
VisitVarHandleCompareAndExchange(HInvoke * invoke)4906 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4907 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4908 }
4909
VisitVarHandleCompareAndExchange(HInvoke * invoke)4910 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4911 GenerateVarHandleCompareAndSetOrExchange(
4912 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
4913 }
4914
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4915 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4916 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4917 }
4918
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4919 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4920 GenerateVarHandleCompareAndSetOrExchange(
4921 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
4922 }
4923
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4924 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4925 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
4926 }
4927
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4928 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4929 GenerateVarHandleCompareAndSetOrExchange(
4930 invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
4931 }
4932
VisitVarHandleCompareAndSet(HInvoke * invoke)4933 void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4934 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4935 }
4936
VisitVarHandleCompareAndSet(HInvoke * invoke)4937 void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4938 GenerateVarHandleCompareAndSetOrExchange(
4939 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
4940 }
4941
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4942 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4943 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4944 }
4945
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4946 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4947 GenerateVarHandleCompareAndSetOrExchange(
4948 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
4949 }
4950
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4951 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4952 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4953 }
4954
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4955 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4956 GenerateVarHandleCompareAndSetOrExchange(
4957 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
4958 }
4959
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4960 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4961 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4962 }
4963
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4964 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4965 GenerateVarHandleCompareAndSetOrExchange(
4966 invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
4967 }
4968
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4969 void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4970 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
4971 }
4972
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4973 void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4974 GenerateVarHandleCompareAndSetOrExchange(
4975 invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
4976 }
4977
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,GetAndUpdateOp get_and_update_op)4978 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
4979 GetAndUpdateOp get_and_update_op) {
4980 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4981 return;
4982 }
4983
4984 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4985 invoke->GetType() == DataType::Type::kReference) {
4986 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4987 // the passed reference and reloads it from the field, thus seeing the new value
4988 // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
4989 return;
4990 }
4991
4992 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4993
4994 size_t old_temp_count = locations->GetTempCount();
4995 DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4996 if (DataType::IsFloatingPointType(invoke->GetType())) {
4997 if (get_and_update_op == GetAndUpdateOp::kAdd) {
4998 // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
4999 locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
5000 } else {
5001 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
5002 // We can reuse the declaring class temporary if present.
5003 if (old_temp_count == 1u &&
5004 !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5005 // Add a temporary for `old_value` if floating point `new_value` takes a scratch register.
5006 locations->AddTemp(Location::RequiresRegister());
5007 }
5008 }
5009 }
5010 // We need a temporary for the byte-swap path for bitwise operations unless the argument is a
5011 // zero which does not need a byte-swap. We can reuse the declaring class temporary if present.
5012 if (old_temp_count == 1u &&
5013 (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) &&
5014 GetExpectedVarHandleCoordinatesCount(invoke) == 2u &&
5015 !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5016 DataType::Type value_type =
5017 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5018 if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) {
5019 locations->AddTemp(Location::RequiresRegister());
5020 }
5021 }
5022 }
5023
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARM64 * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)5024 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5025 CodeGeneratorARM64* codegen,
5026 GetAndUpdateOp get_and_update_op,
5027 std::memory_order order,
5028 bool byte_swap = false) {
5029 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5030 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5031
5032 MacroAssembler* masm = codegen->GetVIXLAssembler();
5033 LocationSummary* locations = invoke->GetLocations();
5034 CPURegister arg = InputCPURegisterOrZeroRegAt(invoke, arg_index);
5035 CPURegister out = helpers::OutputCPURegister(invoke);
5036
5037 VarHandleTarget target = GetVarHandleTarget(invoke);
5038 VarHandleSlowPathARM64* slow_path = nullptr;
5039 if (!byte_swap) {
5040 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5041 slow_path->SetGetAndUpdateOp(get_and_update_op);
5042 GenerateVarHandleTarget(invoke, target, codegen);
5043 __ Bind(slow_path->GetNativeByteOrderLabel());
5044 }
5045
5046 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
5047 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5048 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
5049 // Mark card for object, the new value shall be stored.
5050 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
5051 codegen->MarkGCCard(target.object, arg.W(), new_value_can_be_null);
5052 }
5053
5054 // Reuse the `target.offset` temporary for the pointer to the target location,
5055 // except for references that need the offset for the non-Baker read barrier.
5056 UseScratchRegisterScope temps(masm);
5057 Register tmp_ptr = target.offset.X();
5058 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
5059 value_type == DataType::Type::kReference) {
5060 tmp_ptr = temps.AcquireX();
5061 }
5062 __ Add(tmp_ptr, target.object.X(), target.offset.X());
5063
5064 // The load/store type is never floating point.
5065 bool is_fp = DataType::IsFloatingPointType(value_type);
5066 DataType::Type load_store_type = is_fp
5067 ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5068 : value_type;
5069 // Avoid sign extension in the CAS loop. Sign-extend after the loop.
5070 // Note: Using unsigned values yields the same value to store (we do not store higher bits).
5071 if (value_type == DataType::Type::kInt8) {
5072 load_store_type = DataType::Type::kUint8;
5073 } else if (value_type == DataType::Type::kInt16) {
5074 load_store_type = DataType::Type::kUint16;
5075 }
5076
5077 // Prepare register for old value.
5078 CPURegister old_value = out;
5079 if (get_and_update_op == GetAndUpdateOp::kSet) {
5080 // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5081 // rather than moving between core and FP registers in the loop.
5082 arg = MoveToTempIfFpRegister(arg, value_type, masm, &temps);
5083 if (DataType::IsFloatingPointType(value_type) && !arg.IsZero()) {
5084 // We need a temporary register but we have already used a scratch register for
5085 // the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one
5086 // in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that.
5087 old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5088 } else if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) &&
5089 value_type == DataType::Type::kReference) {
5090 // Load the old value initially to a scratch register.
5091 // We shall move it to `out` later with a read barrier.
5092 old_value = temps.AcquireW();
5093 }
5094 }
5095
5096 if (byte_swap) {
5097 DCHECK_NE(value_type, DataType::Type::kReference);
5098 DCHECK_NE(DataType::Size(value_type), 1u);
5099 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5100 // We need to do the byte swapping in the CAS loop for GetAndAdd.
5101 get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5102 } else if (!arg.IsZero()) {
5103 // For other operations, avoid byte swap inside the CAS loop by providing an adjusted `arg`.
5104 // For GetAndSet use a scratch register; FP argument is already in a scratch register.
5105 // For bitwise operations GenerateGetAndUpdate() needs both scratch registers;
5106 // we have allocated a normal temporary to handle that.
5107 CPURegister temp = (get_and_update_op == GetAndUpdateOp::kSet)
5108 ? (is_fp ? arg : (arg.Is64Bits() ? temps.AcquireX() : temps.AcquireW()))
5109 : CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5110 GenerateReverseBytes(masm, load_store_type, arg, temp);
5111 arg = temp;
5112 }
5113 }
5114
5115 GenerateGetAndUpdate(codegen, get_and_update_op, load_store_type, order, tmp_ptr, arg, old_value);
5116
5117 if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
5118 // The only adjustment needed is sign-extension for `kInt16`.
5119 // Everything else has been done by the `GenerateGetAndUpdate()`.
5120 DCHECK(byte_swap);
5121 if (value_type == DataType::Type::kInt16) {
5122 DCHECK_EQ(load_store_type, DataType::Type::kUint16);
5123 __ Sxth(out.W(), old_value.W());
5124 }
5125 } else if (byte_swap) {
5126 // Also handles moving to FP registers.
5127 GenerateReverseBytes(masm, value_type, old_value, out);
5128 } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat64) {
5129 __ Fmov(out.D(), old_value.X());
5130 } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat32) {
5131 __ Fmov(out.S(), old_value.W());
5132 } else if (value_type == DataType::Type::kInt8) {
5133 __ Sxtb(out.W(), old_value.W());
5134 } else if (value_type == DataType::Type::kInt16) {
5135 __ Sxth(out.W(), old_value.W());
5136 } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5137 if (kUseBakerReadBarrier) {
5138 codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W());
5139 } else {
5140 codegen->GenerateReadBarrierSlow(
5141 invoke,
5142 Location::RegisterLocation(out.GetCode()),
5143 Location::RegisterLocation(old_value.GetCode()),
5144 Location::RegisterLocation(target.object.GetCode()),
5145 /*offset=*/ 0u,
5146 /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5147 }
5148 }
5149
5150 if (!byte_swap) {
5151 __ Bind(slow_path->GetExitLabel());
5152 }
5153 }
5154
VisitVarHandleGetAndSet(HInvoke * invoke)5155 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5156 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5157 }
5158
VisitVarHandleGetAndSet(HInvoke * invoke)5159 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5160 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5161 }
5162
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5163 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5164 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5165 }
5166
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5167 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5168 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5169 }
5170
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5171 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5172 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5173 }
5174
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5175 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5176 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5177 }
5178
VisitVarHandleGetAndAdd(HInvoke * invoke)5179 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5180 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5181 }
5182
VisitVarHandleGetAndAdd(HInvoke * invoke)5183 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5184 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5185 }
5186
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5187 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5188 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5189 }
5190
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5191 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5192 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5193 }
5194
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5195 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5196 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5197 }
5198
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5199 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5200 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5201 }
5202
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5203 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5204 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5205 }
5206
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5207 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5208 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5209 }
5210
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5211 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5212 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5213 }
5214
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5215 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5216 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5217 }
5218
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5219 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5220 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5221 }
5222
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5223 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5224 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5225 }
5226
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5227 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5228 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5229 }
5230
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5231 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5232 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5233 }
5234
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5235 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5236 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5237 }
5238
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5239 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5240 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5241 }
5242
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5243 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5244 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5245 }
5246
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5247 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5248 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5249 }
5250
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5251 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5252 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5253 }
5254
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5255 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5256 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5257 }
5258
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5259 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5260 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5261 }
5262
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5263 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5264 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5265 }
5266
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5267 void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5268 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5269 }
5270
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5271 void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5272 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5273 }
5274
EmitByteArrayViewCode(CodeGenerator * codegen_in)5275 void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5276 DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5277 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
5278 MacroAssembler* masm = codegen->GetVIXLAssembler();
5279 HInvoke* invoke = GetInvoke();
5280 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5281 DataType::Type value_type =
5282 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5283 DCHECK_NE(value_type, DataType::Type::kReference);
5284 size_t size = DataType::Size(value_type);
5285 DCHECK_GT(size, 1u);
5286 Register varhandle = InputRegisterAt(invoke, 0);
5287 Register object = InputRegisterAt(invoke, 1);
5288 Register index = InputRegisterAt(invoke, 2);
5289
5290 MemberOffset class_offset = mirror::Object::ClassOffset();
5291 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5292 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5293 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5294
5295 __ Bind(GetByteArrayViewCheckLabel());
5296
5297 VarHandleTarget target = GetVarHandleTarget(invoke);
5298 {
5299 UseScratchRegisterScope temps(masm);
5300 Register temp = temps.AcquireW();
5301 Register temp2 = temps.AcquireW();
5302
5303 // The main path checked that the coordinateType0 is an array class that matches
5304 // the class of the actual coordinate argument but it does not match the value type.
5305 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5306 __ Ldr(temp, HeapOperand(varhandle, class_offset.Int32Value()));
5307 codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5308 __ Cmp(temp, temp2);
5309 __ B(GetEntryLabel(), ne);
5310
5311 // Check for array index out of bounds.
5312 __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
5313 __ Subs(temp, temp, index);
5314 __ Ccmp(temp, size, NoFlag, hs); // If SUBS yields LO (C=false), keep the C flag clear.
5315 __ B(GetEntryLabel(), lo);
5316
5317 // Construct the target.
5318 __ Add(target.offset, index, data_offset.Int32Value());
5319
5320 // Alignment check. For unaligned access, go to the runtime.
5321 DCHECK(IsPowerOfTwo(size));
5322 if (size == 2u) {
5323 __ Tbnz(target.offset, 0, GetEntryLabel());
5324 } else {
5325 __ Tst(target.offset, size - 1u);
5326 __ B(GetEntryLabel(), ne);
5327 }
5328
5329 // Byte order check. For native byte order return to the main path.
5330 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5331 IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5332 // There is no reason to differentiate between native byte order and byte-swap
5333 // for setting a zero bit pattern. Just return to the main path.
5334 __ B(GetNativeByteOrderLabel());
5335 return;
5336 }
5337 __ Ldr(temp, HeapOperand(varhandle, native_byte_order_offset.Int32Value()));
5338 __ Cbnz(temp, GetNativeByteOrderLabel());
5339 }
5340
5341 switch (access_mode_template) {
5342 case mirror::VarHandle::AccessModeTemplate::kGet:
5343 GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
5344 break;
5345 case mirror::VarHandle::AccessModeTemplate::kSet:
5346 GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
5347 break;
5348 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5349 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5350 GenerateVarHandleCompareAndSetOrExchange(
5351 invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5352 break;
5353 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5354 GenerateVarHandleGetAndUpdate(
5355 invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5356 break;
5357 }
5358 __ B(GetExitLabel());
5359 }
5360
5361 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
5362 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
5363 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
5364 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
5365 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
5366 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject);
5367 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString);
5368 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence);
5369 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray);
5370 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean);
5371 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar);
5372 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt);
5373 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong);
5374 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat);
5375 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble);
5376 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
5377 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
5378
5379 // 1.8.
5380 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
5381 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
5382 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
5383 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
5384 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
5385
5386 UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact)
5387 UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke)
5388
5389 UNREACHABLE_INTRINSICS(ARM64)
5390
5391 #undef __
5392
5393 } // namespace arm64
5394 } // namespace art
5395