1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86_64.h"
18
19 #include <limits>
20
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsic_objects.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string.h"
35 #include "scoped_thread_state_change-inl.h"
36 #include "thread-current-inl.h"
37 #include "utils/x86_64/assembler_x86_64.h"
38 #include "utils/x86_64/constants_x86_64.h"
39 #include "well_known_classes.h"
40
41 namespace art HIDDEN {
42
43 namespace x86_64 {
44
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)45 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
46 : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
47 }
48
GetAssembler()49 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
50 return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
51 }
52
GetAllocator()53 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
54 return codegen_->GetGraph()->GetAllocator();
55 }
56
TryDispatch(HInvoke * invoke)57 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
58 Dispatch(invoke);
59 LocationSummary* res = invoke->GetLocations();
60 if (res == nullptr) {
61 return false;
62 }
63 return res->Intrinsified();
64 }
65
66 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
67
68 #define __ assembler->
69
GenArrayAddress(X86_64Assembler * assembler,CpuRegister dest,CpuRegister base,Location pos,DataType::Type type,uint32_t data_offset)70 static void GenArrayAddress(X86_64Assembler* assembler,
71 CpuRegister dest,
72 CpuRegister base,
73 Location pos,
74 DataType::Type type,
75 uint32_t data_offset) {
76 // Note: The heap is in low 4GiB, so we're using LEAL rather than LEAQ to save on code size.
77 if (pos.IsConstant()) {
78 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
79 __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
80 } else {
81 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
82 __ leal(dest, Address(base, pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
83 }
84 }
85
86 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
87 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
88 public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)89 explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
90 : SlowPathCode(instruction) {
91 }
92
EmitNativeCode(CodeGenerator * codegen)93 void EmitNativeCode(CodeGenerator* codegen) override {
94 DCHECK(codegen->EmitBakerReadBarrier());
95 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
96 X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
97 LocationSummary* locations = instruction_->GetLocations();
98 DCHECK(locations->CanCall());
99 DCHECK(instruction_->IsInvokeStaticOrDirect())
100 << "Unexpected instruction in read barrier arraycopy slow path: "
101 << instruction_->DebugName();
102 DCHECK(instruction_->GetLocations()->Intrinsified());
103 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
104 Location length = locations->InAt(4);
105
106 const DataType::Type type = DataType::Type::kReference;
107 const int32_t element_size = DataType::Size(type);
108
109 CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
110 CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
111 CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
112
113 __ Bind(GetEntryLabel());
114 // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
115 GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
116
117 NearLabel loop;
118 __ Bind(&loop);
119 __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
120 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
121 // TODO: Inline the mark bit check before calling the runtime?
122 // TMP = ReadBarrier::Mark(TMP);
123 // No need to save live registers; it's taken care of by the
124 // entrypoint. Also, there is no need to update the stack mask,
125 // as this runtime call will not trigger a garbage collection.
126 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
127 // This runtime call does not require a stack map.
128 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
129 __ MaybePoisonHeapReference(CpuRegister(TMP));
130 __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
131 __ addl(src_curr_addr, Immediate(element_size));
132 __ addl(dst_curr_addr, Immediate(element_size));
133 __ cmpl(src_curr_addr, src_stop_addr);
134 __ j(kNotEqual, &loop);
135 __ jmp(GetExitLabel());
136 }
137
GetDescription() const138 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
139
140 private:
141 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
142 };
143
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)144 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
145 LocationSummary* locations =
146 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
147 locations->SetInAt(0, Location::RequiresFpuRegister());
148 locations->SetOut(Location::RequiresRegister());
149 }
150
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)151 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
152 LocationSummary* locations =
153 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
154 locations->SetInAt(0, Location::RequiresRegister());
155 locations->SetOut(Location::RequiresFpuRegister());
156 }
157
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)158 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
159 Location input = locations->InAt(0);
160 Location output = locations->Out();
161 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
162 }
163
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)164 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
165 Location input = locations->InAt(0);
166 Location output = locations->Out();
167 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
168 }
169
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)170 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
171 CreateFPToIntLocations(allocator_, invoke);
172 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)173 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
174 CreateIntToFPLocations(allocator_, invoke);
175 }
176
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)177 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
178 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
179 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)180 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
181 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
182 }
183
VisitFloatFloatToRawIntBits(HInvoke * invoke)184 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
185 CreateFPToIntLocations(allocator_, invoke);
186 }
VisitFloatIntBitsToFloat(HInvoke * invoke)187 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
188 CreateIntToFPLocations(allocator_, invoke);
189 }
190
VisitFloatFloatToRawIntBits(HInvoke * invoke)191 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
192 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
193 }
VisitFloatIntBitsToFloat(HInvoke * invoke)194 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
195 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
196 }
197
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)198 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
199 LocationSummary* locations =
200 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
201 locations->SetInAt(0, Location::RequiresRegister());
202 locations->SetOut(Location::SameAsFirstInput());
203 }
204
VisitIntegerReverseBytes(HInvoke * invoke)205 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
206 CreateIntToIntLocations(allocator_, invoke);
207 }
208
VisitIntegerReverseBytes(HInvoke * invoke)209 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
210 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
211 }
212
VisitLongReverseBytes(HInvoke * invoke)213 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
214 CreateIntToIntLocations(allocator_, invoke);
215 }
216
VisitLongReverseBytes(HInvoke * invoke)217 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
218 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
219 }
220
VisitShortReverseBytes(HInvoke * invoke)221 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
222 CreateIntToIntLocations(allocator_, invoke);
223 }
224
VisitShortReverseBytes(HInvoke * invoke)225 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
226 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
227 }
228
GenIsInfinite(LocationSummary * locations,bool is64bit,CodeGeneratorX86_64 * codegen)229 static void GenIsInfinite(LocationSummary* locations,
230 bool is64bit,
231 CodeGeneratorX86_64* codegen) {
232 X86_64Assembler* assembler = codegen->GetAssembler();
233
234 XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
235 CpuRegister output = locations->Out().AsRegister<CpuRegister>();
236
237 NearLabel done1, done2;
238
239 if (is64bit) {
240 double kPositiveInfinity = std::numeric_limits<double>::infinity();
241 double kNegativeInfinity = -1 * kPositiveInfinity;
242
243 __ xorq(output, output);
244 __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
245 __ j(kNotEqual, &done1);
246 __ j(kParityEven, &done2);
247 __ movq(output, Immediate(1));
248 __ jmp(&done2);
249 __ Bind(&done1);
250 __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
251 __ j(kNotEqual, &done2);
252 __ j(kParityEven, &done2);
253 __ movq(output, Immediate(1));
254 __ Bind(&done2);
255 } else {
256 float kPositiveInfinity = std::numeric_limits<float>::infinity();
257 float kNegativeInfinity = -1 * kPositiveInfinity;
258
259 __ xorl(output, output);
260 __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
261 __ j(kNotEqual, &done1);
262 __ j(kParityEven, &done2);
263 __ movl(output, Immediate(1));
264 __ jmp(&done2);
265 __ Bind(&done1);
266 __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
267 __ j(kNotEqual, &done2);
268 __ j(kParityEven, &done2);
269 __ movl(output, Immediate(1));
270 __ Bind(&done2);
271 }
272 }
273
VisitFloatIsInfinite(HInvoke * invoke)274 void IntrinsicLocationsBuilderX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
275 CreateFPToIntLocations(allocator_, invoke);
276 }
277
VisitFloatIsInfinite(HInvoke * invoke)278 void IntrinsicCodeGeneratorX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
279 GenIsInfinite(invoke->GetLocations(), /* is64bit=*/ false, codegen_);
280 }
281
VisitDoubleIsInfinite(HInvoke * invoke)282 void IntrinsicLocationsBuilderX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
283 CreateFPToIntLocations(allocator_, invoke);
284 }
285
VisitDoubleIsInfinite(HInvoke * invoke)286 void IntrinsicCodeGeneratorX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
287 GenIsInfinite(invoke->GetLocations(), /* is64bit=*/ true, codegen_);
288 }
289
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)290 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
291 LocationSummary* locations =
292 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
293 locations->SetInAt(0, Location::RequiresFpuRegister());
294 locations->SetOut(Location::RequiresFpuRegister());
295 }
296
VisitMathSqrt(HInvoke * invoke)297 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
298 CreateFPToFPLocations(allocator_, invoke);
299 }
300
VisitMathSqrt(HInvoke * invoke)301 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
302 LocationSummary* locations = invoke->GetLocations();
303 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
304 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
305
306 GetAssembler()->sqrtsd(out, in);
307 }
308
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)309 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
310 HInvoke* invoke,
311 CodeGeneratorX86_64* codegen) {
312 // Do we have instruction support?
313 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
314 return;
315 }
316
317 CreateFPToFPLocations(allocator, invoke);
318 }
319
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)320 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
321 LocationSummary* locations = invoke->GetLocations();
322 DCHECK(!locations->WillCall());
323 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
324 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
325 __ roundsd(out, in, Immediate(round_mode));
326 }
327
VisitMathCeil(HInvoke * invoke)328 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
329 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
330 }
331
VisitMathCeil(HInvoke * invoke)332 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
333 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
334 }
335
VisitMathFloor(HInvoke * invoke)336 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
337 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
338 }
339
VisitMathFloor(HInvoke * invoke)340 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
341 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
342 }
343
VisitMathRint(HInvoke * invoke)344 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
345 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
346 }
347
VisitMathRint(HInvoke * invoke)348 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
349 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
350 }
351
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)352 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
353 HInvoke* invoke,
354 CodeGeneratorX86_64* codegen) {
355 // Do we have instruction support?
356 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
357 return;
358 }
359
360 LocationSummary* locations =
361 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
362 locations->SetInAt(0, Location::RequiresFpuRegister());
363 locations->SetOut(Location::RequiresRegister());
364 locations->AddTemp(Location::RequiresFpuRegister());
365 locations->AddTemp(Location::RequiresFpuRegister());
366 }
367
VisitMathRoundFloat(HInvoke * invoke)368 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
369 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
370 }
371
VisitMathRoundFloat(HInvoke * invoke)372 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
373 LocationSummary* locations = invoke->GetLocations();
374 DCHECK(!locations->WillCall());
375
376 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
377 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
378 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
379 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
380 NearLabel skip_incr, done;
381 X86_64Assembler* assembler = GetAssembler();
382
383 // Since no direct x86 rounding instruction matches the required semantics,
384 // this intrinsic is implemented as follows:
385 // result = floor(in);
386 // if (in - result >= 0.5f)
387 // result = result + 1.0f;
388 __ movss(t2, in);
389 __ roundss(t1, in, Immediate(1));
390 __ subss(t2, t1);
391 __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
392 __ j(kBelow, &skip_incr);
393 __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
394 __ Bind(&skip_incr);
395
396 // Final conversion to an integer. Unfortunately this also does not have a
397 // direct x86 instruction, since NaN should map to 0 and large positive
398 // values need to be clipped to the extreme value.
399 codegen_->Load32BitValue(out, kPrimIntMax);
400 __ cvtsi2ss(t2, out);
401 __ comiss(t1, t2);
402 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
403 __ movl(out, Immediate(0)); // does not change flags
404 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
405 __ cvttss2si(out, t1);
406 __ Bind(&done);
407 }
408
VisitMathRoundDouble(HInvoke * invoke)409 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
410 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
411 }
412
VisitMathRoundDouble(HInvoke * invoke)413 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
414 LocationSummary* locations = invoke->GetLocations();
415 DCHECK(!locations->WillCall());
416
417 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
418 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
419 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
420 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
421 NearLabel skip_incr, done;
422 X86_64Assembler* assembler = GetAssembler();
423
424 // Since no direct x86 rounding instruction matches the required semantics,
425 // this intrinsic is implemented as follows:
426 // result = floor(in);
427 // if (in - result >= 0.5)
428 // result = result + 1.0f;
429 __ movsd(t2, in);
430 __ roundsd(t1, in, Immediate(1));
431 __ subsd(t2, t1);
432 __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
433 __ j(kBelow, &skip_incr);
434 __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
435 __ Bind(&skip_incr);
436
437 // Final conversion to an integer. Unfortunately this also does not have a
438 // direct x86 instruction, since NaN should map to 0 and large positive
439 // values need to be clipped to the extreme value.
440 codegen_->Load64BitValue(out, kPrimLongMax);
441 __ cvtsi2sd(t2, out, /* is64bit= */ true);
442 __ comisd(t1, t2);
443 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
444 __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit
445 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
446 __ cvttsd2si(out, t1, /* is64bit= */ true);
447 __ Bind(&done);
448 }
449
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)450 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
451 LocationSummary* locations =
452 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
453 InvokeRuntimeCallingConvention calling_convention;
454 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
455 locations->SetOut(Location::FpuRegisterLocation(XMM0));
456
457 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
458 }
459
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)460 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
461 QuickEntrypointEnum entry) {
462 LocationSummary* locations = invoke->GetLocations();
463 DCHECK(locations->WillCall());
464 DCHECK(invoke->IsInvokeStaticOrDirect());
465
466 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
467 }
468
VisitMathCos(HInvoke * invoke)469 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
470 CreateFPToFPCallLocations(allocator_, invoke);
471 }
472
VisitMathCos(HInvoke * invoke)473 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
474 GenFPToFPCall(invoke, codegen_, kQuickCos);
475 }
476
VisitMathSin(HInvoke * invoke)477 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
478 CreateFPToFPCallLocations(allocator_, invoke);
479 }
480
VisitMathSin(HInvoke * invoke)481 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
482 GenFPToFPCall(invoke, codegen_, kQuickSin);
483 }
484
VisitMathAcos(HInvoke * invoke)485 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
486 CreateFPToFPCallLocations(allocator_, invoke);
487 }
488
VisitMathAcos(HInvoke * invoke)489 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
490 GenFPToFPCall(invoke, codegen_, kQuickAcos);
491 }
492
VisitMathAsin(HInvoke * invoke)493 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
494 CreateFPToFPCallLocations(allocator_, invoke);
495 }
496
VisitMathAsin(HInvoke * invoke)497 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
498 GenFPToFPCall(invoke, codegen_, kQuickAsin);
499 }
500
VisitMathAtan(HInvoke * invoke)501 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
502 CreateFPToFPCallLocations(allocator_, invoke);
503 }
504
VisitMathAtan(HInvoke * invoke)505 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
506 GenFPToFPCall(invoke, codegen_, kQuickAtan);
507 }
508
VisitMathCbrt(HInvoke * invoke)509 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
510 CreateFPToFPCallLocations(allocator_, invoke);
511 }
512
VisitMathCbrt(HInvoke * invoke)513 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
514 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
515 }
516
VisitMathCosh(HInvoke * invoke)517 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
518 CreateFPToFPCallLocations(allocator_, invoke);
519 }
520
VisitMathCosh(HInvoke * invoke)521 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
522 GenFPToFPCall(invoke, codegen_, kQuickCosh);
523 }
524
VisitMathExp(HInvoke * invoke)525 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
526 CreateFPToFPCallLocations(allocator_, invoke);
527 }
528
VisitMathExp(HInvoke * invoke)529 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
530 GenFPToFPCall(invoke, codegen_, kQuickExp);
531 }
532
VisitMathExpm1(HInvoke * invoke)533 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
534 CreateFPToFPCallLocations(allocator_, invoke);
535 }
536
VisitMathExpm1(HInvoke * invoke)537 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
538 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
539 }
540
VisitMathLog(HInvoke * invoke)541 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
542 CreateFPToFPCallLocations(allocator_, invoke);
543 }
544
VisitMathLog(HInvoke * invoke)545 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
546 GenFPToFPCall(invoke, codegen_, kQuickLog);
547 }
548
VisitMathLog10(HInvoke * invoke)549 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
550 CreateFPToFPCallLocations(allocator_, invoke);
551 }
552
VisitMathLog10(HInvoke * invoke)553 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
554 GenFPToFPCall(invoke, codegen_, kQuickLog10);
555 }
556
VisitMathSinh(HInvoke * invoke)557 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
558 CreateFPToFPCallLocations(allocator_, invoke);
559 }
560
VisitMathSinh(HInvoke * invoke)561 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
562 GenFPToFPCall(invoke, codegen_, kQuickSinh);
563 }
564
VisitMathTan(HInvoke * invoke)565 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
566 CreateFPToFPCallLocations(allocator_, invoke);
567 }
568
VisitMathTan(HInvoke * invoke)569 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
570 GenFPToFPCall(invoke, codegen_, kQuickTan);
571 }
572
VisitMathTanh(HInvoke * invoke)573 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
574 CreateFPToFPCallLocations(allocator_, invoke);
575 }
576
VisitMathTanh(HInvoke * invoke)577 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
578 GenFPToFPCall(invoke, codegen_, kQuickTanh);
579 }
580
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)581 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
582 LocationSummary* locations =
583 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
584 InvokeRuntimeCallingConvention calling_convention;
585 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
586 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
587 locations->SetOut(Location::FpuRegisterLocation(XMM0));
588
589 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
590 }
591
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)592 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
593 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
594 LocationSummary* locations =
595 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
596 InvokeRuntimeCallingConvention calling_convention;
597 locations->SetInAt(0, Location::RequiresFpuRegister());
598 locations->SetInAt(1, Location::RequiresFpuRegister());
599 locations->SetInAt(2, Location::RequiresFpuRegister());
600 locations->SetOut(Location::SameAsFirstInput());
601 }
602
VisitMathAtan2(HInvoke * invoke)603 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
604 CreateFPFPToFPCallLocations(allocator_, invoke);
605 }
606
VisitMathAtan2(HInvoke * invoke)607 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
608 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
609 }
610
VisitMathPow(HInvoke * invoke)611 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
612 CreateFPFPToFPCallLocations(allocator_, invoke);
613 }
614
VisitMathPow(HInvoke * invoke)615 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
616 GenFPToFPCall(invoke, codegen_, kQuickPow);
617 }
618
VisitMathHypot(HInvoke * invoke)619 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
620 CreateFPFPToFPCallLocations(allocator_, invoke);
621 }
622
VisitMathHypot(HInvoke * invoke)623 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
624 GenFPToFPCall(invoke, codegen_, kQuickHypot);
625 }
626
VisitMathNextAfter(HInvoke * invoke)627 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
628 CreateFPFPToFPCallLocations(allocator_, invoke);
629 }
630
VisitMathNextAfter(HInvoke * invoke)631 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
632 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
633 }
634
CreateSystemArrayCopyLocations(HInvoke * invoke)635 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
636 // Check to see if we have known failures that will cause us to have to bail out
637 // to the runtime, and just generate the runtime call directly.
638 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
639 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
640
641 // The positions must be non-negative.
642 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
643 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
644 // We will have to fail anyways.
645 return;
646 }
647
648 // The length must be > 0.
649 HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
650 if (length != nullptr) {
651 int32_t len = length->GetValue();
652 if (len < 0) {
653 // Just call as normal.
654 return;
655 }
656 }
657 LocationSummary* locations =
658 new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary
659 (invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
660 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
661 locations->SetInAt(0, Location::RequiresRegister());
662 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
663 locations->SetInAt(2, Location::RequiresRegister());
664 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
665 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
666
667 // And we need some temporaries. We will use REP MOVS{B,W,L}, so we need fixed registers.
668 locations->AddTemp(Location::RegisterLocation(RSI));
669 locations->AddTemp(Location::RegisterLocation(RDI));
670 locations->AddTemp(Location::RegisterLocation(RCX));
671 }
672
673 template <typename LhsType>
EmitCmplJLess(X86_64Assembler * assembler,LhsType lhs,Location rhs,Label * label)674 static void EmitCmplJLess(X86_64Assembler* assembler,
675 LhsType lhs,
676 Location rhs,
677 Label* label) {
678 static_assert(std::is_same_v<LhsType, CpuRegister> || std::is_same_v<LhsType, Address>);
679 if (rhs.IsConstant()) {
680 int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
681 __ cmpl(lhs, Immediate(rhs_constant));
682 } else {
683 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
684 }
685 __ j(kLess, label);
686 }
687
CheckSystemArrayCopyPosition(X86_64Assembler * assembler,CpuRegister array,Location pos,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_array_length,bool position_sign_checked)688 static void CheckSystemArrayCopyPosition(X86_64Assembler* assembler,
689 CpuRegister array,
690 Location pos,
691 Location length,
692 SlowPathCode* slow_path,
693 CpuRegister temp,
694 bool length_is_array_length,
695 bool position_sign_checked) {
696 // Where is the length in the Array?
697 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
698
699 if (pos.IsConstant()) {
700 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
701 if (pos_const == 0) {
702 if (!length_is_array_length) {
703 // Check that length(array) >= length.
704 EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
705 }
706 } else {
707 // Calculate length(array) - pos.
708 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
709 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
710 __ movl(temp, Address(array, length_offset));
711 __ subl(temp, Immediate(pos_const));
712
713 // Check that (length(array) - pos) >= length.
714 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
715 }
716 } else if (length_is_array_length) {
717 // The only way the copy can succeed is if pos is zero.
718 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
719 __ testl(pos_reg, pos_reg);
720 __ j(kNotEqual, slow_path->GetEntryLabel());
721 } else {
722 // Check that pos >= 0.
723 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
724 if (!position_sign_checked) {
725 __ testl(pos_reg, pos_reg);
726 __ j(kLess, slow_path->GetEntryLabel());
727 }
728
729 // Calculate length(array) - pos.
730 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
731 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
732 __ movl(temp, Address(array, length_offset));
733 __ subl(temp, pos_reg);
734
735 // Check that (length(array) - pos) >= length.
736 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
737 }
738 }
739
SystemArrayCopyPrimitive(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,DataType::Type type)740 static void SystemArrayCopyPrimitive(HInvoke* invoke,
741 X86_64Assembler* assembler,
742 CodeGeneratorX86_64* codegen,
743 DataType::Type type) {
744 LocationSummary* locations = invoke->GetLocations();
745 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
746 Location src_pos = locations->InAt(1);
747 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
748 Location dest_pos = locations->InAt(3);
749 Location length = locations->InAt(4);
750
751 // Temporaries that we need for MOVSB/W/L.
752 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
753 DCHECK_EQ(src_base.AsRegister(), RSI);
754 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
755 DCHECK_EQ(dest_base.AsRegister(), RDI);
756 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
757 DCHECK_EQ(count.AsRegister(), RCX);
758
759 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
760 codegen->AddSlowPath(slow_path);
761
762 // Bail out if the source and destination are the same.
763 __ cmpl(src, dest);
764 __ j(kEqual, slow_path->GetEntryLabel());
765
766 // Bail out if the source is null.
767 __ testl(src, src);
768 __ j(kEqual, slow_path->GetEntryLabel());
769
770 // Bail out if the destination is null.
771 __ testl(dest, dest);
772 __ j(kEqual, slow_path->GetEntryLabel());
773
774 // If the length is negative, bail out.
775 // We have already checked in the LocationsBuilder for the constant case.
776 if (!length.IsConstant()) {
777 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
778 __ j(kLess, slow_path->GetEntryLabel());
779 }
780
781 // Validity checks: source. Use src_base as a temporary register.
782 CheckSystemArrayCopyPosition(assembler,
783 src,
784 src_pos,
785 length,
786 slow_path,
787 src_base,
788 /*length_is_array_length=*/ false,
789 /*position_sign_checked=*/ false);
790
791 // Validity checks: dest. Use src_base as a temporary register.
792 CheckSystemArrayCopyPosition(assembler,
793 dest,
794 dest_pos,
795 length,
796 slow_path,
797 src_base,
798 /*length_is_array_length=*/ false,
799 /*position_sign_checked=*/ false);
800
801 // We need the count in RCX.
802 if (length.IsConstant()) {
803 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
804 } else {
805 __ movl(count, length.AsRegister<CpuRegister>());
806 }
807
808 // Okay, everything checks out. Finally time to do the copy.
809 // Check assumption that sizeof(Char) is 2 (used in scaling below).
810 const size_t data_size = DataType::Size(type);
811 const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
812
813 GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
814 GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
815
816 // Do the move.
817 switch (type) {
818 case DataType::Type::kInt8:
819 __ rep_movsb();
820 break;
821 case DataType::Type::kUint16:
822 __ rep_movsw();
823 break;
824 case DataType::Type::kInt32:
825 __ rep_movsl();
826 break;
827 default:
828 LOG(FATAL) << "Unexpected data type for intrinsic";
829 }
830 __ Bind(slow_path->GetExitLabel());
831 }
832
VisitSystemArrayCopyChar(HInvoke * invoke)833 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
834 CreateSystemArrayCopyLocations(invoke);
835 }
VisitSystemArrayCopyChar(HInvoke * invoke)836 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
837 X86_64Assembler* assembler = GetAssembler();
838 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
839 }
840
VisitSystemArrayCopyByte(HInvoke * invoke)841 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
842 X86_64Assembler* assembler = GetAssembler();
843 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
844 }
845
VisitSystemArrayCopyByte(HInvoke * invoke)846 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
847 CreateSystemArrayCopyLocations(invoke);
848 }
849
VisitSystemArrayCopyInt(HInvoke * invoke)850 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
851 X86_64Assembler* assembler = GetAssembler();
852 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
853 }
854
VisitSystemArrayCopyInt(HInvoke * invoke)855 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
856 CreateSystemArrayCopyLocations(invoke);
857 }
858
VisitSystemArrayCopy(HInvoke * invoke)859 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
860 // The only read barrier implementation supporting the
861 // SystemArrayCopy intrinsic is the Baker-style read barriers.
862 if (codegen_->EmitNonBakerReadBarrier()) {
863 return;
864 }
865
866 constexpr int32_t kLengthThreshold = -1; // No cut-off - handle large arrays in intrinsic code.
867 constexpr size_t kInitialNumTemps = 0u; // We shall allocate temps explicitly.
868 LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
869 invoke, kLengthThreshold, kInitialNumTemps);
870 if (locations != nullptr) {
871 // Add temporaries. We will use REP MOVSL, so we need fixed registers.
872 DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
873 locations->AddTemp(Location::RegisterLocation(RSI));
874 locations->AddTemp(Location::RegisterLocation(RDI));
875 locations->AddTemp(Location::RegisterLocation(RCX));
876 }
877 }
878
VisitSystemArrayCopy(HInvoke * invoke)879 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
880 // The only read barrier implementation supporting the
881 // SystemArrayCopy intrinsic is the Baker-style read barriers.
882 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
883
884 X86_64Assembler* assembler = GetAssembler();
885 LocationSummary* locations = invoke->GetLocations();
886
887 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
888 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
889 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
890 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
891 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
892
893 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
894 Location src_pos = locations->InAt(1);
895 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
896 Location dest_pos = locations->InAt(3);
897 Location length = locations->InAt(4);
898 Location temp1_loc = locations->GetTemp(0);
899 CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
900 Location temp2_loc = locations->GetTemp(1);
901 CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
902 Location temp3_loc = locations->GetTemp(2);
903 CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
904
905 SlowPathCode* intrinsic_slow_path =
906 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
907 codegen_->AddSlowPath(intrinsic_slow_path);
908
909 NearLabel conditions_on_positions_validated;
910 SystemArrayCopyOptimizations optimizations(invoke);
911
912 // If source and destination are the same, we go to slow path if we need to do forward copying.
913 // We do not need to do this check if the source and destination positions are the same.
914 if (!optimizations.GetSourcePositionIsDestinationPosition()) {
915 if (src_pos.IsConstant()) {
916 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
917 if (dest_pos.IsConstant()) {
918 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
919 if (optimizations.GetDestinationIsSource()) {
920 // Checked when building locations.
921 DCHECK_GE(src_pos_constant, dest_pos_constant);
922 } else if (src_pos_constant < dest_pos_constant) {
923 __ cmpl(src, dest);
924 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
925 }
926 } else {
927 if (!optimizations.GetDestinationIsSource()) {
928 __ cmpl(src, dest);
929 __ j(kNotEqual, &conditions_on_positions_validated);
930 }
931 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
932 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
933 }
934 } else {
935 if (!optimizations.GetDestinationIsSource()) {
936 __ cmpl(src, dest);
937 __ j(kNotEqual, &conditions_on_positions_validated);
938 }
939 CpuRegister src_pos_reg = src_pos.AsRegister<CpuRegister>();
940 EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
941 }
942 }
943
944 __ Bind(&conditions_on_positions_validated);
945
946 if (!optimizations.GetSourceIsNotNull()) {
947 // Bail out if the source is null.
948 __ testl(src, src);
949 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
950 }
951
952 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
953 // Bail out if the destination is null.
954 __ testl(dest, dest);
955 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
956 }
957
958 // If the length is negative, bail out.
959 // We have already checked in the LocationsBuilder for the constant case.
960 if (!length.IsConstant() &&
961 !optimizations.GetCountIsSourceLength() &&
962 !optimizations.GetCountIsDestinationLength()) {
963 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
964 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
965 }
966
967 // Validity checks: source.
968 CheckSystemArrayCopyPosition(assembler,
969 src,
970 src_pos,
971 length,
972 intrinsic_slow_path,
973 temp1,
974 optimizations.GetCountIsSourceLength(),
975 /*position_sign_checked=*/ false);
976
977 // Validity checks: dest.
978 bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
979 CheckSystemArrayCopyPosition(assembler,
980 dest,
981 dest_pos,
982 length,
983 intrinsic_slow_path,
984 temp1,
985 optimizations.GetCountIsDestinationLength(),
986 dest_position_sign_checked);
987
988 auto check_non_primitive_array_class = [&](CpuRegister klass, CpuRegister temp) {
989 // No read barrier is needed for reading a chain of constant references for comparing
990 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
991 // /* HeapReference<Class> */ temp = klass->component_type_
992 __ movl(temp, Address(klass, component_offset));
993 __ MaybeUnpoisonHeapReference(temp);
994 // Check that the component type is not null.
995 __ testl(temp, temp);
996 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
997 // Check that the component type is not a primitive.
998 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
999 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1000 };
1001
1002 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1003 // Check whether all elements of the source array are assignable to the component
1004 // type of the destination array. We do two checks: the classes are the same,
1005 // or the destination is Object[]. If none of these checks succeed, we go to the
1006 // slow path.
1007
1008 if (codegen_->EmitBakerReadBarrier()) {
1009 // /* HeapReference<Class> */ temp1 = dest->klass_
1010 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1011 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
1012 // Register `temp1` is not trashed by the read barrier emitted
1013 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1014 // method produces a call to a ReadBarrierMarkRegX entry point,
1015 // which saves all potentially live registers, including
1016 // temporaries such a `temp1`.
1017 // /* HeapReference<Class> */ temp2 = src->klass_
1018 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1019 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
1020 // If heap poisoning is enabled, `temp1` and `temp2` have been unpoisoned
1021 // by the previous calls to GenerateFieldLoadWithBakerReadBarrier.
1022 } else {
1023 // /* HeapReference<Class> */ temp1 = dest->klass_
1024 __ movl(temp1, Address(dest, class_offset));
1025 __ MaybeUnpoisonHeapReference(temp1);
1026 // /* HeapReference<Class> */ temp2 = src->klass_
1027 __ movl(temp2, Address(src, class_offset));
1028 __ MaybeUnpoisonHeapReference(temp2);
1029 }
1030
1031 __ cmpl(temp1, temp2);
1032 if (optimizations.GetDestinationIsTypedObjectArray()) {
1033 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1034 NearLabel do_copy;
1035 // For class match, we can skip the source type check regardless of the optimization flag.
1036 __ j(kEqual, &do_copy);
1037 // No read barrier is needed for reading a chain of constant references
1038 // for comparing with null, see `ReadBarrierOption`.
1039 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1040 __ movl(temp1, Address(temp1, component_offset));
1041 __ MaybeUnpoisonHeapReference(temp1);
1042 // No need to unpoison the following heap reference load, as
1043 // we're comparing against null.
1044 __ cmpl(Address(temp1, super_offset), Immediate(0));
1045 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1046 // Bail out if the source is not a non primitive array.
1047 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1048 check_non_primitive_array_class(temp2, CpuRegister(TMP));
1049 }
1050 __ Bind(&do_copy);
1051 } else {
1052 DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1053 // For class match, we can skip the array type check completely if at least one of source
1054 // and destination is known to be a non primitive array, otherwise one check is enough.
1055 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1056 if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1057 !optimizations.GetSourceIsNonPrimitiveArray()) {
1058 check_non_primitive_array_class(temp2, CpuRegister(TMP));
1059 }
1060 }
1061 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1062 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1063 // Bail out if the source is not a non primitive array.
1064 // No read barrier is needed for reading a chain of constant references for comparing
1065 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1066 // /* HeapReference<Class> */ temp1 = src->klass_
1067 __ movl(temp1, Address(src, class_offset));
1068 __ MaybeUnpoisonHeapReference(temp1);
1069 check_non_primitive_array_class(temp1, CpuRegister(TMP));
1070 }
1071
1072 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1073 // Null constant length: not need to emit the loop code at all.
1074 } else {
1075 const DataType::Type type = DataType::Type::kReference;
1076 const int32_t element_size = DataType::Size(type);
1077 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1078
1079 // Don't enter copy loop if `length == 0`.
1080 NearLabel skip_copy_and_write_barrier;
1081 if (!length.IsConstant()) {
1082 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1083 __ j(kEqual, &skip_copy_and_write_barrier);
1084 }
1085
1086 // Compute base source address, base destination address, and end
1087 // source address in `temp1`, `temp2` and `temp3` respectively.
1088 GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
1089 GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
1090
1091 SlowPathCode* read_barrier_slow_path = nullptr;
1092 if (codegen_->EmitBakerReadBarrier()) {
1093 // SystemArrayCopy implementation for Baker read barriers (see
1094 // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1095 //
1096 // if (src_ptr != end_ptr) {
1097 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1098 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
1099 // bool is_gray = (rb_state == ReadBarrier::GrayState());
1100 // if (is_gray) {
1101 // // Slow-path copy.
1102 // do {
1103 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1104 // } while (src_ptr != end_ptr)
1105 // } else {
1106 // // Fast-path copy.
1107 // do {
1108 // *dest_ptr++ = *src_ptr++;
1109 // } while (src_ptr != end_ptr)
1110 // }
1111 // }
1112
1113 // Given the numeric representation, it's enough to check the low bit of the rb_state.
1114 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1115 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1116 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1117 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1118 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1119
1120 // if (rb_state == ReadBarrier::GrayState())
1121 // goto slow_path;
1122 // At this point, just do the "if" and make sure that flags are preserved until the branch.
1123 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1124
1125 // Load fence to prevent load-load reordering.
1126 // Note that this is a no-op, thanks to the x86-64 memory model.
1127 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1128
1129 // Slow path used to copy array when `src` is gray.
1130 read_barrier_slow_path =
1131 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1132 codegen_->AddSlowPath(read_barrier_slow_path);
1133
1134 // We have done the "if" of the gray bit check above, now branch based on the flags.
1135 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1136 }
1137
1138 if (length.IsConstant()) {
1139 __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1140 } else {
1141 __ movl(temp3, length.AsRegister<CpuRegister>());
1142 }
1143
1144 // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
1145 DCHECK_EQ(temp1.AsRegister(), RSI);
1146 DCHECK_EQ(temp2.AsRegister(), RDI);
1147 DCHECK_EQ(temp3.AsRegister(), RCX);
1148 __ rep_movsl();
1149
1150 if (read_barrier_slow_path != nullptr) {
1151 DCHECK(codegen_->EmitBakerReadBarrier());
1152 __ Bind(read_barrier_slow_path->GetExitLabel());
1153 }
1154
1155 // We only need one card marking on the destination array.
1156 codegen_->MarkGCCard(temp1, temp2, dest);
1157
1158 __ Bind(&skip_copy_and_write_barrier);
1159 }
1160
1161 __ Bind(intrinsic_slow_path->GetExitLabel());
1162 }
1163
VisitStringCompareTo(HInvoke * invoke)1164 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1165 LocationSummary* locations = new (allocator_) LocationSummary(
1166 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1167 InvokeRuntimeCallingConvention calling_convention;
1168 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1169 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1170 locations->SetOut(Location::RegisterLocation(RAX));
1171 }
1172
VisitStringCompareTo(HInvoke * invoke)1173 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1174 X86_64Assembler* assembler = GetAssembler();
1175 LocationSummary* locations = invoke->GetLocations();
1176
1177 // Note that the null check must have been done earlier.
1178 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1179
1180 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1181 __ testl(argument, argument);
1182 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1183 codegen_->AddSlowPath(slow_path);
1184 __ j(kEqual, slow_path->GetEntryLabel());
1185
1186 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1187 __ Bind(slow_path->GetExitLabel());
1188 }
1189
VisitStringEquals(HInvoke * invoke)1190 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1191 LocationSummary* locations =
1192 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1193 locations->SetInAt(0, Location::RequiresRegister());
1194 locations->SetInAt(1, Location::RequiresRegister());
1195
1196 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1197 locations->AddTemp(Location::RegisterLocation(RCX));
1198 locations->AddTemp(Location::RegisterLocation(RDI));
1199
1200 // Set output, RSI needed for repe_cmpsq instruction anyways.
1201 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1202 }
1203
VisitStringEquals(HInvoke * invoke)1204 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1205 X86_64Assembler* assembler = GetAssembler();
1206 LocationSummary* locations = invoke->GetLocations();
1207
1208 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1209 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1210 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1211 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1212 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1213
1214 NearLabel end, return_true, return_false;
1215
1216 // Get offsets of count, value, and class fields within a string object.
1217 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1218 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1219 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1220
1221 // Note that the null check must have been done earlier.
1222 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1223
1224 StringEqualsOptimizations optimizations(invoke);
1225 if (!optimizations.GetArgumentNotNull()) {
1226 // Check if input is null, return false if it is.
1227 __ testl(arg, arg);
1228 __ j(kEqual, &return_false);
1229 }
1230
1231 if (!optimizations.GetArgumentIsString()) {
1232 // Instanceof check for the argument by comparing class fields.
1233 // All string objects must have the same type since String cannot be subclassed.
1234 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1235 // If the argument is a string object, its class field must be equal to receiver's class field.
1236 //
1237 // As the String class is expected to be non-movable, we can read the class
1238 // field from String.equals' arguments without read barriers.
1239 AssertNonMovableStringClass();
1240 // Also, because we use the loaded class references only to compare them, we
1241 // don't need to unpoison them.
1242 // /* HeapReference<Class> */ rcx = str->klass_
1243 __ movl(rcx, Address(str, class_offset));
1244 // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1245 __ cmpl(rcx, Address(arg, class_offset));
1246 __ j(kNotEqual, &return_false);
1247 }
1248
1249 // Reference equality check, return true if same reference.
1250 __ cmpl(str, arg);
1251 __ j(kEqual, &return_true);
1252
1253 // Load length and compression flag of receiver string.
1254 __ movl(rcx, Address(str, count_offset));
1255 // Check if lengths and compressiond flags are equal, return false if they're not.
1256 // Two identical strings will always have same compression style since
1257 // compression style is decided on alloc.
1258 __ cmpl(rcx, Address(arg, count_offset));
1259 __ j(kNotEqual, &return_false);
1260 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1261 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1262 "Expecting 0=compressed, 1=uncompressed");
1263 __ jrcxz(&return_true);
1264
1265 if (mirror::kUseStringCompression) {
1266 NearLabel string_uncompressed;
1267 // Extract length and differentiate between both compressed or both uncompressed.
1268 // Different compression style is cut above.
1269 __ shrl(rcx, Immediate(1));
1270 __ j(kCarrySet, &string_uncompressed);
1271 // Divide string length by 2, rounding up, and continue as if uncompressed.
1272 // Merge clearing the compression flag with +1 for rounding.
1273 __ addl(rcx, Immediate(1));
1274 __ shrl(rcx, Immediate(1));
1275 __ Bind(&string_uncompressed);
1276 }
1277 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1278 __ leal(rsi, Address(str, value_offset));
1279 __ leal(rdi, Address(arg, value_offset));
1280
1281 // Divide string length by 4 and adjust for lengths not divisible by 4.
1282 __ addl(rcx, Immediate(3));
1283 __ shrl(rcx, Immediate(2));
1284
1285 // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1286 // or 8 characters (compressed) at a time.
1287 DCHECK_ALIGNED(value_offset, 8);
1288 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1289
1290 // Loop to compare strings four characters at a time starting at the beginning of the string.
1291 __ repe_cmpsq();
1292 // If strings are not equal, zero flag will be cleared.
1293 __ j(kNotEqual, &return_false);
1294
1295 // Return true and exit the function.
1296 // If loop does not result in returning false, we return true.
1297 __ Bind(&return_true);
1298 __ movl(rsi, Immediate(1));
1299 __ jmp(&end);
1300
1301 // Return false and exit the function.
1302 __ Bind(&return_false);
1303 __ xorl(rsi, rsi);
1304 __ Bind(&end);
1305 }
1306
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1307 static void CreateStringIndexOfLocations(HInvoke* invoke,
1308 ArenaAllocator* allocator,
1309 bool start_at_zero) {
1310 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1311 LocationSummary::kCallOnSlowPath,
1312 kIntrinsified);
1313 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1314 locations->SetInAt(0, Location::RegisterLocation(RDI));
1315 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1316 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1317 // of the instruction explicitly.
1318 // Note: This works as we don't clobber RAX anywhere.
1319 locations->SetInAt(1, Location::RegisterLocation(RAX));
1320 if (!start_at_zero) {
1321 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1322 }
1323 // As we clobber RDI during execution anyways, also use it as the output.
1324 locations->SetOut(Location::SameAsFirstInput());
1325
1326 // repne scasw uses RCX as the counter.
1327 locations->AddTemp(Location::RegisterLocation(RCX));
1328 // Need another temporary to be able to compute the result.
1329 locations->AddTemp(Location::RequiresRegister());
1330 }
1331
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1332 static void GenerateStringIndexOf(HInvoke* invoke,
1333 X86_64Assembler* assembler,
1334 CodeGeneratorX86_64* codegen,
1335 bool start_at_zero) {
1336 LocationSummary* locations = invoke->GetLocations();
1337
1338 // Note that the null check must have been done earlier.
1339 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1340
1341 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1342 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1343 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1344 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1345 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1346
1347 // Check our assumptions for registers.
1348 DCHECK_EQ(string_obj.AsRegister(), RDI);
1349 DCHECK_EQ(search_value.AsRegister(), RAX);
1350 DCHECK_EQ(counter.AsRegister(), RCX);
1351 DCHECK_EQ(out.AsRegister(), RDI);
1352
1353 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1354 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1355 SlowPathCode* slow_path = nullptr;
1356 HInstruction* code_point = invoke->InputAt(1);
1357 if (code_point->IsIntConstant()) {
1358 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1359 std::numeric_limits<uint16_t>::max()) {
1360 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1361 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1362 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1363 codegen->AddSlowPath(slow_path);
1364 __ jmp(slow_path->GetEntryLabel());
1365 __ Bind(slow_path->GetExitLabel());
1366 return;
1367 }
1368 } else if (code_point->GetType() != DataType::Type::kUint16) {
1369 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1370 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1371 codegen->AddSlowPath(slow_path);
1372 __ j(kAbove, slow_path->GetEntryLabel());
1373 }
1374
1375 // From here down, we know that we are looking for a char that fits in
1376 // 16 bits (uncompressed) or 8 bits (compressed).
1377 // Location of reference to data array within the String object.
1378 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1379 // Location of count within the String object.
1380 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1381
1382 // Load the count field of the string containing the length and compression flag.
1383 __ movl(string_length, Address(string_obj, count_offset));
1384
1385 // Do a zero-length check. Even with string compression `count == 0` means empty.
1386 // TODO: Support jecxz.
1387 NearLabel not_found_label;
1388 __ testl(string_length, string_length);
1389 __ j(kEqual, ¬_found_label);
1390
1391 if (mirror::kUseStringCompression) {
1392 // Use TMP to keep string_length_flagged.
1393 __ movl(CpuRegister(TMP), string_length);
1394 // Mask out first bit used as compression flag.
1395 __ shrl(string_length, Immediate(1));
1396 }
1397
1398 if (start_at_zero) {
1399 // Number of chars to scan is the same as the string length.
1400 __ movl(counter, string_length);
1401 // Move to the start of the string.
1402 __ addq(string_obj, Immediate(value_offset));
1403 } else {
1404 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1405
1406 // Do a start_index check.
1407 __ cmpl(start_index, string_length);
1408 __ j(kGreaterEqual, ¬_found_label);
1409
1410 // Ensure we have a start index >= 0;
1411 __ xorl(counter, counter);
1412 __ cmpl(start_index, Immediate(0));
1413 __ cmov(kGreater, counter, start_index, /* is64bit= */ false); // 32-bit copy is enough.
1414
1415 if (mirror::kUseStringCompression) {
1416 NearLabel modify_counter, offset_uncompressed_label;
1417 __ testl(CpuRegister(TMP), Immediate(1));
1418 __ j(kNotZero, &offset_uncompressed_label);
1419 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1420 __ jmp(&modify_counter);
1421 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1422 __ Bind(&offset_uncompressed_label);
1423 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1424 __ Bind(&modify_counter);
1425 } else {
1426 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1427 }
1428 // Now update ecx, the work counter: it's gonna be string.length - start_index.
1429 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
1430 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1431 }
1432
1433 if (mirror::kUseStringCompression) {
1434 NearLabel uncompressed_string_comparison;
1435 NearLabel comparison_done;
1436 __ testl(CpuRegister(TMP), Immediate(1));
1437 __ j(kNotZero, &uncompressed_string_comparison);
1438 // Check if RAX (search_value) is ASCII.
1439 __ cmpl(search_value, Immediate(127));
1440 __ j(kGreater, ¬_found_label);
1441 // Comparing byte-per-byte.
1442 __ repne_scasb();
1443 __ jmp(&comparison_done);
1444 // Everything is set up for repne scasw:
1445 // * Comparison address in RDI.
1446 // * Counter in ECX.
1447 __ Bind(&uncompressed_string_comparison);
1448 __ repne_scasw();
1449 __ Bind(&comparison_done);
1450 } else {
1451 __ repne_scasw();
1452 }
1453 // Did we find a match?
1454 __ j(kNotEqual, ¬_found_label);
1455
1456 // Yes, we matched. Compute the index of the result.
1457 __ subl(string_length, counter);
1458 __ leal(out, Address(string_length, -1));
1459
1460 NearLabel done;
1461 __ jmp(&done);
1462
1463 // Failed to match; return -1.
1464 __ Bind(¬_found_label);
1465 __ movl(out, Immediate(-1));
1466
1467 // And join up at the end.
1468 __ Bind(&done);
1469 if (slow_path != nullptr) {
1470 __ Bind(slow_path->GetExitLabel());
1471 }
1472 }
1473
VisitStringIndexOf(HInvoke * invoke)1474 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1475 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1476 }
1477
VisitStringIndexOf(HInvoke * invoke)1478 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1479 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1480 }
1481
VisitStringIndexOfAfter(HInvoke * invoke)1482 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1483 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1484 }
1485
VisitStringIndexOfAfter(HInvoke * invoke)1486 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1487 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1488 }
1489
VisitStringNewStringFromBytes(HInvoke * invoke)1490 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1491 LocationSummary* locations = new (allocator_) LocationSummary(
1492 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1493 InvokeRuntimeCallingConvention calling_convention;
1494 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1495 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1496 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1497 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1498 locations->SetOut(Location::RegisterLocation(RAX));
1499 }
1500
VisitStringNewStringFromBytes(HInvoke * invoke)1501 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1502 X86_64Assembler* assembler = GetAssembler();
1503 LocationSummary* locations = invoke->GetLocations();
1504
1505 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1506 __ testl(byte_array, byte_array);
1507 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1508 codegen_->AddSlowPath(slow_path);
1509 __ j(kEqual, slow_path->GetEntryLabel());
1510
1511 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1512 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1513 __ Bind(slow_path->GetExitLabel());
1514 }
1515
VisitStringNewStringFromChars(HInvoke * invoke)1516 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1517 LocationSummary* locations =
1518 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1519 InvokeRuntimeCallingConvention calling_convention;
1520 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1521 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1522 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1523 locations->SetOut(Location::RegisterLocation(RAX));
1524 }
1525
VisitStringNewStringFromChars(HInvoke * invoke)1526 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1527 // No need to emit code checking whether `locations->InAt(2)` is a null
1528 // pointer, as callers of the native method
1529 //
1530 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1531 //
1532 // all include a null check on `data` before calling that method.
1533 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1534 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1535 }
1536
VisitStringNewStringFromString(HInvoke * invoke)1537 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1538 LocationSummary* locations = new (allocator_) LocationSummary(
1539 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1540 InvokeRuntimeCallingConvention calling_convention;
1541 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1542 locations->SetOut(Location::RegisterLocation(RAX));
1543 }
1544
VisitStringNewStringFromString(HInvoke * invoke)1545 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1546 X86_64Assembler* assembler = GetAssembler();
1547 LocationSummary* locations = invoke->GetLocations();
1548
1549 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1550 __ testl(string_to_copy, string_to_copy);
1551 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1552 codegen_->AddSlowPath(slow_path);
1553 __ j(kEqual, slow_path->GetEntryLabel());
1554
1555 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1556 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1557 __ Bind(slow_path->GetExitLabel());
1558 }
1559
VisitStringGetCharsNoCheck(HInvoke * invoke)1560 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1561 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1562 LocationSummary* locations =
1563 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1564 locations->SetInAt(0, Location::RequiresRegister());
1565 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1566 locations->SetInAt(2, Location::RequiresRegister());
1567 locations->SetInAt(3, Location::RequiresRegister());
1568 locations->SetInAt(4, Location::RequiresRegister());
1569
1570 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1571 locations->AddTemp(Location::RegisterLocation(RSI));
1572 locations->AddTemp(Location::RegisterLocation(RDI));
1573 locations->AddTemp(Location::RegisterLocation(RCX));
1574 }
1575
VisitStringGetCharsNoCheck(HInvoke * invoke)1576 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1577 X86_64Assembler* assembler = GetAssembler();
1578 LocationSummary* locations = invoke->GetLocations();
1579
1580 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1581 // Location of data in char array buffer.
1582 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1583 // Location of char array data in string.
1584 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1585
1586 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1587 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1588 Location srcBegin = locations->InAt(1);
1589 int srcBegin_value =
1590 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1591 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1592 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1593 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1594
1595 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1596 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1597 DCHECK_EQ(char_size, 2u);
1598
1599 NearLabel done;
1600 // Compute the number of chars (words) to move.
1601 __ movl(CpuRegister(RCX), srcEnd);
1602 if (srcBegin.IsConstant()) {
1603 __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1604 } else {
1605 DCHECK(srcBegin.IsRegister());
1606 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1607 }
1608 if (mirror::kUseStringCompression) {
1609 NearLabel copy_uncompressed, copy_loop;
1610 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1611 DCHECK_EQ(c_char_size, 1u);
1612 // Location of count in string.
1613 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1614
1615 __ testl(Address(obj, count_offset), Immediate(1));
1616 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1617 "Expecting 0=compressed, 1=uncompressed");
1618 __ j(kNotZero, ©_uncompressed);
1619 // Compute the address of the source string by adding the number of chars from
1620 // the source beginning to the value offset of a string.
1621 __ leaq(CpuRegister(RSI),
1622 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1623 // Start the loop to copy String's value to Array of Char.
1624 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1625
1626 __ Bind(©_loop);
1627 __ jrcxz(&done);
1628 // Use TMP as temporary (convert byte from RSI to word).
1629 // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1630 __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1631 __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1632 __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1633 __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1634 // TODO: Add support for LOOP to X86_64Assembler.
1635 __ subl(CpuRegister(RCX), Immediate(1));
1636 __ jmp(©_loop);
1637
1638 __ Bind(©_uncompressed);
1639 }
1640
1641 __ leaq(CpuRegister(RSI),
1642 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1643 // Compute the address of the destination buffer.
1644 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1645 // Do the move.
1646 __ rep_movsw();
1647
1648 __ Bind(&done);
1649 }
1650
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1651 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1652 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1653 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1654 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1655 // to avoid a SIGBUS.
1656 switch (size) {
1657 case DataType::Type::kInt8:
1658 __ movsxb(out, Address(address, 0));
1659 break;
1660 case DataType::Type::kInt16:
1661 __ movsxw(out, Address(address, 0));
1662 break;
1663 case DataType::Type::kInt32:
1664 __ movl(out, Address(address, 0));
1665 break;
1666 case DataType::Type::kInt64:
1667 __ movq(out, Address(address, 0));
1668 break;
1669 default:
1670 LOG(FATAL) << "Type not recognized for peek: " << size;
1671 UNREACHABLE();
1672 }
1673 }
1674
VisitMemoryPeekByte(HInvoke * invoke)1675 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1676 CreateIntToIntLocations(allocator_, invoke);
1677 }
1678
VisitMemoryPeekByte(HInvoke * invoke)1679 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1680 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1681 }
1682
VisitMemoryPeekIntNative(HInvoke * invoke)1683 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1684 CreateIntToIntLocations(allocator_, invoke);
1685 }
1686
VisitMemoryPeekIntNative(HInvoke * invoke)1687 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1688 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1689 }
1690
VisitMemoryPeekLongNative(HInvoke * invoke)1691 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1692 CreateIntToIntLocations(allocator_, invoke);
1693 }
1694
VisitMemoryPeekLongNative(HInvoke * invoke)1695 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1696 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1697 }
1698
VisitMemoryPeekShortNative(HInvoke * invoke)1699 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1700 CreateIntToIntLocations(allocator_, invoke);
1701 }
1702
VisitMemoryPeekShortNative(HInvoke * invoke)1703 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1704 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1705 }
1706
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1707 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1708 LocationSummary* locations =
1709 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1710 locations->SetInAt(0, Location::RequiresRegister());
1711 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1712 }
1713
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1714 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1715 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1716 Location value = locations->InAt(1);
1717 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1718 // to avoid a SIGBUS.
1719 switch (size) {
1720 case DataType::Type::kInt8:
1721 if (value.IsConstant()) {
1722 __ movb(Address(address, 0),
1723 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1724 } else {
1725 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1726 }
1727 break;
1728 case DataType::Type::kInt16:
1729 if (value.IsConstant()) {
1730 __ movw(Address(address, 0),
1731 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1732 } else {
1733 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1734 }
1735 break;
1736 case DataType::Type::kInt32:
1737 if (value.IsConstant()) {
1738 __ movl(Address(address, 0),
1739 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1740 } else {
1741 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1742 }
1743 break;
1744 case DataType::Type::kInt64:
1745 if (value.IsConstant()) {
1746 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1747 DCHECK(IsInt<32>(v));
1748 int32_t v_32 = v;
1749 __ movq(Address(address, 0), Immediate(v_32));
1750 } else {
1751 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1752 }
1753 break;
1754 default:
1755 LOG(FATAL) << "Type not recognized for poke: " << size;
1756 UNREACHABLE();
1757 }
1758 }
1759
VisitMemoryPokeByte(HInvoke * invoke)1760 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1761 CreateIntIntToVoidLocations(allocator_, invoke);
1762 }
1763
VisitMemoryPokeByte(HInvoke * invoke)1764 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1765 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1766 }
1767
VisitMemoryPokeIntNative(HInvoke * invoke)1768 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1769 CreateIntIntToVoidLocations(allocator_, invoke);
1770 }
1771
VisitMemoryPokeIntNative(HInvoke * invoke)1772 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1773 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1774 }
1775
VisitMemoryPokeLongNative(HInvoke * invoke)1776 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1777 CreateIntIntToVoidLocations(allocator_, invoke);
1778 }
1779
VisitMemoryPokeLongNative(HInvoke * invoke)1780 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1781 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1782 }
1783
VisitMemoryPokeShortNative(HInvoke * invoke)1784 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1785 CreateIntIntToVoidLocations(allocator_, invoke);
1786 }
1787
VisitMemoryPokeShortNative(HInvoke * invoke)1788 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1789 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1790 }
1791
VisitThreadCurrentThread(HInvoke * invoke)1792 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1793 LocationSummary* locations =
1794 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1795 locations->SetOut(Location::RequiresRegister());
1796 }
1797
VisitThreadCurrentThread(HInvoke * invoke)1798 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1799 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1800 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1801 /* no_rip= */ true));
1802 }
1803
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1804 static void GenUnsafeGet(HInvoke* invoke,
1805 DataType::Type type,
1806 [[maybe_unused]] bool is_volatile,
1807 CodeGeneratorX86_64* codegen) {
1808 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1809 LocationSummary* locations = invoke->GetLocations();
1810 Location base_loc = locations->InAt(1);
1811 CpuRegister base = base_loc.AsRegister<CpuRegister>();
1812 Location offset_loc = locations->InAt(2);
1813 CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1814 Location output_loc = locations->Out();
1815 CpuRegister output = output_loc.AsRegister<CpuRegister>();
1816
1817 switch (type) {
1818 case DataType::Type::kInt8:
1819 __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1820 break;
1821
1822 case DataType::Type::kInt32:
1823 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1824 break;
1825
1826 case DataType::Type::kReference: {
1827 if (codegen->EmitReadBarrier()) {
1828 if (kUseBakerReadBarrier) {
1829 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1830 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1831 invoke, output_loc, base, src, /* needs_null_check= */ false);
1832 } else {
1833 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1834 codegen->GenerateReadBarrierSlow(
1835 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1836 }
1837 } else {
1838 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1839 __ MaybeUnpoisonHeapReference(output);
1840 }
1841 break;
1842 }
1843
1844 case DataType::Type::kInt64:
1845 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1846 break;
1847
1848 default:
1849 LOG(FATAL) << "Unsupported op size " << type;
1850 UNREACHABLE();
1851 }
1852 }
1853
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)1854 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1855 HInvoke* invoke,
1856 CodeGeneratorX86_64* codegen) {
1857 bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1858 LocationSummary* locations =
1859 new (allocator) LocationSummary(invoke,
1860 can_call
1861 ? LocationSummary::kCallOnSlowPath
1862 : LocationSummary::kNoCall,
1863 kIntrinsified);
1864 if (can_call && kUseBakerReadBarrier) {
1865 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1866 }
1867 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1868 locations->SetInAt(1, Location::RequiresRegister());
1869 locations->SetInAt(2, Location::RequiresRegister());
1870 locations->SetOut(Location::RequiresRegister(),
1871 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1872 }
1873
VisitUnsafeGet(HInvoke * invoke)1874 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1875 VisitJdkUnsafeGet(invoke);
1876 }
VisitUnsafeGetVolatile(HInvoke * invoke)1877 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1878 VisitJdkUnsafeGetVolatile(invoke);
1879 }
VisitUnsafeGetLong(HInvoke * invoke)1880 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1881 VisitJdkUnsafeGetLong(invoke);
1882 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1883 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1884 VisitJdkUnsafeGetLongVolatile(invoke);
1885 }
VisitUnsafeGetObject(HInvoke * invoke)1886 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1887 VisitJdkUnsafeGetReference(invoke);
1888 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1889 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1890 VisitJdkUnsafeGetReferenceVolatile(invoke);
1891 }
VisitUnsafeGetByte(HInvoke * invoke)1892 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
1893 VisitJdkUnsafeGetByte(invoke);
1894 }
1895
VisitJdkUnsafeGet(HInvoke * invoke)1896 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1897 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1898 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1899 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1900 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1901 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1902 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1903 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1904 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1905 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1906 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1907 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1908 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1909 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1910 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1911 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1912 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1913 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1914 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1915 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1916 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1917 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1918 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1919 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1920 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1921 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1922 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1923 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1924 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1925 }
1926
VisitUnsafeGet(HInvoke * invoke)1927 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
1928 VisitJdkUnsafeGet(invoke);
1929 }
VisitUnsafeGetVolatile(HInvoke * invoke)1930 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1931 VisitJdkUnsafeGetVolatile(invoke);
1932 }
VisitUnsafeGetLong(HInvoke * invoke)1933 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1934 VisitJdkUnsafeGetLong(invoke);
1935 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1936 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1937 VisitJdkUnsafeGetLongVolatile(invoke);
1938 }
VisitUnsafeGetObject(HInvoke * invoke)1939 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1940 VisitJdkUnsafeGetReference(invoke);
1941 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1942 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1943 VisitJdkUnsafeGetReferenceVolatile(invoke);
1944 }
VisitUnsafeGetByte(HInvoke * invoke)1945 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
1946 VisitJdkUnsafeGetByte(invoke);
1947 }
1948
VisitJdkUnsafeGet(HInvoke * invoke)1949 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1950 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1951 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1952 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1953 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1954 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1955 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1956 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1957 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1958 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1959 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
1960 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1961 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1962 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1963 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1964 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1965 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1966 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1967 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1968 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
1969 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1970 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1971 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1972 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1973 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1974 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1975 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1976 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1977 GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
1978 }
1979
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1980 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1981 DataType::Type type,
1982 HInvoke* invoke) {
1983 LocationSummary* locations =
1984 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1985 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1986 locations->SetInAt(1, Location::RequiresRegister());
1987 locations->SetInAt(2, Location::RequiresRegister());
1988 locations->SetInAt(3, Location::RequiresRegister());
1989 if (type == DataType::Type::kReference) {
1990 // Need temp registers for card-marking.
1991 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1992 locations->AddTemp(Location::RequiresRegister());
1993 }
1994 }
1995
VisitUnsafePut(HInvoke * invoke)1996 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1997 VisitJdkUnsafePut(invoke);
1998 }
VisitUnsafePutOrdered(HInvoke * invoke)1999 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2000 VisitJdkUnsafePutOrdered(invoke);
2001 }
VisitUnsafePutVolatile(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2003 VisitJdkUnsafePutVolatile(invoke);
2004 }
VisitUnsafePutObject(HInvoke * invoke)2005 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2006 VisitJdkUnsafePutReference(invoke);
2007 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2008 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2009 VisitJdkUnsafePutObjectOrdered(invoke);
2010 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2011 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2012 VisitJdkUnsafePutReferenceVolatile(invoke);
2013 }
VisitUnsafePutLong(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2015 VisitJdkUnsafePutLong(invoke);
2016 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2017 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2018 VisitJdkUnsafePutLongOrdered(invoke);
2019 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2020 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2021 VisitJdkUnsafePutLongVolatile(invoke);
2022 }
VisitUnsafePutByte(HInvoke * invoke)2023 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2024 VisitJdkUnsafePut(invoke);
2025 }
2026
VisitJdkUnsafePut(HInvoke * invoke)2027 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2028 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2029 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2030 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2031 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2032 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2033 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2034 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2035 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2036 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2037 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2038 }
VisitJdkUnsafePutReference(HInvoke * invoke)2039 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2040 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2041 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2042 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2043 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2044 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2045 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2046 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2047 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2048 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2049 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2050 }
VisitJdkUnsafePutLong(HInvoke * invoke)2051 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2052 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2053 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2054 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2055 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2056 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2057 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2058 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2059 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2060 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2061 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2062 }
VisitJdkUnsafePutByte(HInvoke * invoke)2063 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2064 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt8, invoke);
2065 }
2066
2067 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2068 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2069 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
2070 CodeGeneratorX86_64* codegen) {
2071 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2072 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2073 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2074 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2075
2076 if (type == DataType::Type::kInt64) {
2077 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2078 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2079 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2080 __ movl(temp, value);
2081 __ PoisonHeapReference(temp);
2082 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2083 } else {
2084 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2085 }
2086
2087 if (is_volatile) {
2088 codegen->MemoryFence();
2089 }
2090
2091 if (type == DataType::Type::kReference) {
2092 bool value_can_be_null = true; // TODO: Worth finding out this information?
2093 codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2094 locations->GetTemp(1).AsRegister<CpuRegister>(),
2095 base,
2096 value,
2097 value_can_be_null);
2098 }
2099 }
2100
VisitUnsafePut(HInvoke * invoke)2101 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2102 VisitJdkUnsafePut(invoke);
2103 }
VisitUnsafePutOrdered(HInvoke * invoke)2104 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2105 VisitJdkUnsafePutOrdered(invoke);
2106 }
VisitUnsafePutVolatile(HInvoke * invoke)2107 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2108 VisitJdkUnsafePutVolatile(invoke);
2109 }
VisitUnsafePutObject(HInvoke * invoke)2110 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2111 VisitJdkUnsafePutReference(invoke);
2112 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2113 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2114 VisitJdkUnsafePutObjectOrdered(invoke);
2115 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2116 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2117 VisitJdkUnsafePutReferenceVolatile(invoke);
2118 }
VisitUnsafePutLong(HInvoke * invoke)2119 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2120 VisitJdkUnsafePutLong(invoke);
2121 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2122 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2123 VisitJdkUnsafePutLongOrdered(invoke);
2124 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2125 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2126 VisitJdkUnsafePutLongVolatile(invoke);
2127 }
VisitUnsafePutByte(HInvoke * invoke)2128 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2129 VisitJdkUnsafePutByte(invoke);
2130 }
2131
VisitJdkUnsafePut(HInvoke * invoke)2132 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2133 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2134 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2135 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2136 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2137 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2138 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2139 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2140 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2141 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2142 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
2143 }
VisitJdkUnsafePutReference(HInvoke * invoke)2144 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2145 GenUnsafePut(
2146 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2147 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2148 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2149 GenUnsafePut(
2150 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2151 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2152 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2153 GenUnsafePut(
2154 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2155 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2156 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2157 GenUnsafePut(
2158 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2159 }
VisitJdkUnsafePutLong(HInvoke * invoke)2160 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2161 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2162 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2163 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2164 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2165 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2166 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2167 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2168 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2169 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2170 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2171 }
VisitJdkUnsafePutByte(HInvoke * invoke)2172 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2173 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2174 }
2175
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)2176 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2177 HInvoke* invoke,
2178 CodeGeneratorX86_64* codegen,
2179 DataType::Type type) {
2180 const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2181 LocationSummary* locations =
2182 new (allocator) LocationSummary(invoke,
2183 can_call
2184 ? LocationSummary::kCallOnSlowPath
2185 : LocationSummary::kNoCall,
2186 kIntrinsified);
2187 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2188 locations->SetInAt(1, Location::RequiresRegister());
2189 locations->SetInAt(2, Location::RequiresRegister());
2190 // expected value must be in EAX/RAX.
2191 locations->SetInAt(3, Location::RegisterLocation(RAX));
2192 locations->SetInAt(4, Location::RequiresRegister());
2193
2194 // RAX is clobbered in CMPXCHG, but we set it as out so no need to add it as temporary.
2195 locations->SetOut(Location::RegisterLocation(RAX));
2196
2197 if (type == DataType::Type::kReference) {
2198 // Need two temporaries for MarkGCCard.
2199 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2200 locations->AddTemp(Location::RequiresRegister());
2201 if (codegen->EmitReadBarrier()) {
2202 // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
2203 DCHECK(kUseBakerReadBarrier);
2204 locations->AddTemp(Location::RequiresRegister());
2205 }
2206 }
2207 }
2208
VisitUnsafeCASInt(HInvoke * invoke)2209 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2210 VisitJdkUnsafeCASInt(invoke);
2211 }
2212
VisitUnsafeCASLong(HInvoke * invoke)2213 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2214 VisitJdkUnsafeCASLong(invoke);
2215 }
2216
VisitUnsafeCASObject(HInvoke * invoke)2217 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2218 VisitJdkUnsafeCASObject(invoke);
2219 }
2220
VisitJdkUnsafeCASInt(HInvoke * invoke)2221 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2222 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2223 VisitJdkUnsafeCompareAndSetInt(invoke);
2224 }
2225
VisitJdkUnsafeCASLong(HInvoke * invoke)2226 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2227 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2228 VisitJdkUnsafeCompareAndSetLong(invoke);
2229 }
2230
VisitJdkUnsafeCASObject(HInvoke * invoke)2231 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2232 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2233 VisitJdkUnsafeCompareAndSetReference(invoke);
2234 }
2235
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2236 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2237 CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt32);
2238 }
2239
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2240 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2241 CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt64);
2242 }
2243
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2244 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2245 // The only supported read barrier implementation is the Baker-style read barriers.
2246 if (codegen_->EmitNonBakerReadBarrier()) {
2247 return;
2248 }
2249
2250 CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kReference);
2251 }
2252
2253 // Convert ZF into the Boolean result.
GenZFlagToResult(X86_64Assembler * assembler,CpuRegister out)2254 static inline void GenZFlagToResult(X86_64Assembler* assembler, CpuRegister out) {
2255 __ setcc(kZero, out);
2256 __ movzxb(out, out);
2257 }
2258
2259 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64 * codegen,DataType::Type type,Address field_addr,Location value,bool is_cmpxchg,bool byte_swap)2260 static void GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64* codegen,
2261 DataType::Type type,
2262 Address field_addr,
2263 Location value,
2264 bool is_cmpxchg,
2265 bool byte_swap) {
2266 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2267 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2268
2269 if (byte_swap) {
2270 instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2271 instr_codegen->Bswap(value, type);
2272 }
2273
2274 switch (type) {
2275 case DataType::Type::kBool:
2276 case DataType::Type::kInt8:
2277 __ LockCmpxchgb(field_addr, value.AsRegister<CpuRegister>());
2278 break;
2279 case DataType::Type::kInt16:
2280 case DataType::Type::kUint16:
2281 __ LockCmpxchgw(field_addr, value.AsRegister<CpuRegister>());
2282 break;
2283 case DataType::Type::kInt32:
2284 case DataType::Type::kUint32:
2285 __ LockCmpxchgl(field_addr, value.AsRegister<CpuRegister>());
2286 break;
2287 case DataType::Type::kInt64:
2288 case DataType::Type::kUint64:
2289 __ LockCmpxchgq(field_addr, value.AsRegister<CpuRegister>());
2290 break;
2291 default:
2292 LOG(FATAL) << "Unexpected non-integral CAS type " << type;
2293 }
2294 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2295
2296 if (byte_swap) {
2297 // Restore byte order for value.
2298 instr_codegen->Bswap(value, type);
2299 }
2300
2301 CpuRegister rax(RAX);
2302 if (is_cmpxchg) {
2303 if (byte_swap) {
2304 instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2305 }
2306 // Sign-extend or zero-extend the result as necessary.
2307 switch (type) {
2308 case DataType::Type::kBool:
2309 __ movzxb(rax, rax);
2310 break;
2311 case DataType::Type::kInt8:
2312 __ movsxb(rax, rax);
2313 break;
2314 case DataType::Type::kInt16:
2315 __ movsxw(rax, rax);
2316 break;
2317 case DataType::Type::kUint16:
2318 __ movzxw(rax, rax);
2319 break;
2320 default:
2321 break; // No need to do anything.
2322 }
2323 } else {
2324 GenZFlagToResult(assembler, rax);
2325 }
2326 }
2327
GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64 * codegen,Address field_addr,CpuRegister temp,Location value,Location expected,Location out,bool is64bit,bool is_cmpxchg,bool byte_swap)2328 static void GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64* codegen,
2329 Address field_addr,
2330 CpuRegister temp,
2331 Location value,
2332 Location expected,
2333 Location out,
2334 bool is64bit,
2335 bool is_cmpxchg,
2336 bool byte_swap) {
2337 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2338 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2339
2340 Location rax_loc = Location::RegisterLocation(RAX);
2341 Location temp_loc = Location::RegisterLocation(temp.AsRegister());
2342
2343 DataType::Type type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
2344
2345 // Copy `expected` to RAX (required by the CMPXCHG instruction).
2346 codegen->Move(rax_loc, expected);
2347
2348 // Copy value to some other register (ensure it's not RAX).
2349 DCHECK_NE(temp.AsRegister(), RAX);
2350 codegen->Move(temp_loc, value);
2351
2352 if (byte_swap) {
2353 instr_codegen->Bswap(rax_loc, type);
2354 instr_codegen->Bswap(temp_loc, type);
2355 }
2356
2357 if (is64bit) {
2358 __ LockCmpxchgq(field_addr, temp);
2359 } else {
2360 __ LockCmpxchgl(field_addr, temp);
2361 }
2362 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2363 // No need to restore byte order for temporary register.
2364
2365 if (is_cmpxchg) {
2366 if (byte_swap) {
2367 instr_codegen->Bswap(rax_loc, type);
2368 }
2369 __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
2370 } else {
2371 GenZFlagToResult(assembler, out.AsRegister<CpuRegister>());
2372 }
2373 }
2374
2375 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64 * codegen,HInvoke * invoke,CpuRegister base,CpuRegister offset,CpuRegister value,CpuRegister temp1,CpuRegister temp2,CpuRegister temp3,bool is_cmpxchg)2376 static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
2377 HInvoke* invoke,
2378 CpuRegister base,
2379 CpuRegister offset,
2380 CpuRegister value,
2381 CpuRegister temp1,
2382 CpuRegister temp2,
2383 CpuRegister temp3,
2384 bool is_cmpxchg) {
2385 // The only supported read barrier implementation is the Baker-style read barriers.
2386 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2387
2388 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2389
2390 // Mark card for object assuming new value is stored.
2391 bool value_can_be_null = true; // TODO: Worth finding out this information?
2392 codegen->MaybeMarkGCCard(temp1, temp2, base, value, value_can_be_null);
2393
2394 Address field_addr(base, offset, TIMES_1, 0);
2395 if (codegen->EmitBakerReadBarrier()) {
2396 // Need to make sure the reference stored in the field is a to-space
2397 // one before attempting the CAS or the CAS could fail incorrectly.
2398 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2399 invoke,
2400 Location::RegisterLocation(temp3.AsRegister()),
2401 base,
2402 field_addr,
2403 /* needs_null_check= */ false,
2404 /* always_update_field= */ true,
2405 &temp1,
2406 &temp2);
2407 } else {
2408 // Nothing to do, the value will be loaded into the out register by CMPXCHG.
2409 }
2410
2411 bool base_equals_value = (base.AsRegister() == value.AsRegister());
2412 Register value_reg = value.AsRegister();
2413 if (kPoisonHeapReferences) {
2414 if (base_equals_value) {
2415 // If `base` and `value` are the same register location, move `value_reg` to a temporary
2416 // register. This way, poisoning `value_reg` won't invalidate `base`.
2417 value_reg = temp1.AsRegister();
2418 __ movl(CpuRegister(value_reg), base);
2419 }
2420
2421 // Check that the register allocator did not assign the location of expected value (RAX) to
2422 // `value` nor to `base`, so that heap poisoning (when enabled) works as intended below.
2423 // - If `value` were equal to RAX, both references would be poisoned twice, meaning they would
2424 // not be poisoned at all, as heap poisoning uses address negation.
2425 // - If `base` were equal to RAX, poisoning RAX would invalidate `base`.
2426 DCHECK_NE(RAX, value_reg);
2427 DCHECK_NE(RAX, base.AsRegister());
2428
2429 __ PoisonHeapReference(CpuRegister(RAX));
2430 __ PoisonHeapReference(CpuRegister(value_reg));
2431 }
2432
2433 __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2434 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
2435
2436 if (is_cmpxchg) {
2437 // Output is in RAX, so we can rely on CMPXCHG and do nothing.
2438 __ MaybeUnpoisonHeapReference(CpuRegister(RAX));
2439 } else {
2440 GenZFlagToResult(assembler, CpuRegister(RAX));
2441 }
2442
2443 // If heap poisoning is enabled, we need to unpoison the values that were poisoned earlier.
2444 if (kPoisonHeapReferences) {
2445 if (base_equals_value) {
2446 // `value_reg` has been moved to a temporary register, no need to unpoison it.
2447 } else {
2448 // Ensure `value` is not RAX, so that unpoisoning the former does not invalidate the latter.
2449 DCHECK_NE(RAX, value_reg);
2450 __ UnpoisonHeapReference(CpuRegister(value_reg));
2451 }
2452 }
2453 }
2454
2455 // In debug mode, return true if all registers are pairwise different. In release mode, do nothing
2456 // and always return true.
RegsAreAllDifferent(const std::vector<CpuRegister> & regs)2457 static bool RegsAreAllDifferent(const std::vector<CpuRegister>& regs) {
2458 if (kIsDebugBuild) {
2459 for (size_t i = 0; i < regs.size(); ++i) {
2460 for (size_t j = 0; j < i; ++j) {
2461 if (regs[i].AsRegister() == regs[j].AsRegister()) {
2462 return false;
2463 }
2464 }
2465 }
2466 }
2467 return true;
2468 }
2469
2470 // GenCompareAndSetOrExchange handles all value types and therefore accepts generic locations and
2471 // temporary indices that may not correspond to real registers for code paths that do not use them.
GenCompareAndSetOrExchange(CodeGeneratorX86_64 * codegen,HInvoke * invoke,DataType::Type type,CpuRegister base,CpuRegister offset,uint32_t temp1_index,uint32_t temp2_index,uint32_t temp3_index,Location new_value,Location expected,Location out,bool is_cmpxchg,bool byte_swap)2472 static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
2473 HInvoke* invoke,
2474 DataType::Type type,
2475 CpuRegister base,
2476 CpuRegister offset,
2477 uint32_t temp1_index,
2478 uint32_t temp2_index,
2479 uint32_t temp3_index,
2480 Location new_value,
2481 Location expected,
2482 Location out,
2483 bool is_cmpxchg,
2484 bool byte_swap) {
2485 LocationSummary* locations = invoke->GetLocations();
2486 Address field_address(base, offset, TIMES_1, 0);
2487
2488 if (DataType::IsFloatingPointType(type)) {
2489 bool is64bit = (type == DataType::Type::kFloat64);
2490 CpuRegister temp = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2491 DCHECK(RegsAreAllDifferent({base, offset, temp, CpuRegister(RAX)}));
2492
2493 GenCompareAndSetOrExchangeFP(
2494 codegen, field_address, temp, new_value, expected, out, is64bit, is_cmpxchg, byte_swap);
2495 } else {
2496 // Both the expected value for CMPXCHG and the output are in RAX.
2497 DCHECK_EQ(RAX, expected.AsRegister<Register>());
2498 DCHECK_EQ(RAX, out.AsRegister<Register>());
2499
2500 if (type == DataType::Type::kReference) {
2501 CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
2502 CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2503 CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
2504 CpuRegister temp3 = codegen->EmitReadBarrier()
2505 ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
2506 : CpuRegister(kNoRegister);
2507 DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
2508
2509 DCHECK(!byte_swap);
2510 GenCompareAndSetOrExchangeRef(
2511 codegen, invoke, base, offset, new_value_reg, temp1, temp2, temp3, is_cmpxchg);
2512 } else {
2513 GenCompareAndSetOrExchangeInt(codegen, type, field_address, new_value, is_cmpxchg, byte_swap);
2514 }
2515 }
2516 }
2517
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2518 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2519 LocationSummary* locations = invoke->GetLocations();
2520 GenCompareAndSetOrExchange(codegen,
2521 invoke,
2522 type,
2523 /*base=*/ locations->InAt(1).AsRegister<CpuRegister>(),
2524 /*offset=*/ locations->InAt(2).AsRegister<CpuRegister>(),
2525 /*temp1_index=*/ 0,
2526 /*temp2_index=*/ 1,
2527 /*temp3_index=*/ 2,
2528 /*new_value=*/ locations->InAt(4),
2529 /*expected=*/ locations->InAt(3),
2530 locations->Out(),
2531 /*is_cmpxchg=*/ false,
2532 /*byte_swap=*/ false);
2533 }
2534
VisitUnsafeCASInt(HInvoke * invoke)2535 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2536 VisitJdkUnsafeCASInt(invoke);
2537 }
2538
VisitUnsafeCASLong(HInvoke * invoke)2539 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2540 VisitJdkUnsafeCASLong(invoke);
2541 }
2542
VisitUnsafeCASObject(HInvoke * invoke)2543 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2544 VisitJdkUnsafeCASObject(invoke);
2545 }
2546
VisitJdkUnsafeCASInt(HInvoke * invoke)2547 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2548 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2549 VisitJdkUnsafeCompareAndSetInt(invoke);
2550 }
2551
VisitJdkUnsafeCASLong(HInvoke * invoke)2552 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2553 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2554 VisitJdkUnsafeCompareAndSetLong(invoke);
2555 }
2556
VisitJdkUnsafeCASObject(HInvoke * invoke)2557 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2558 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2559 VisitJdkUnsafeCompareAndSetReference(invoke);
2560 }
2561
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2562 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2563 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2564 }
2565
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2566 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2567 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2568 }
2569
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2570 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2571 // The only supported read barrier implementation is the Baker-style read barriers.
2572 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2573
2574 GenCAS(DataType::Type::kReference, invoke, codegen_);
2575 }
2576
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2577 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2578 HInvoke* invoke,
2579 CodeGeneratorX86_64* codegen) {
2580 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2581 LocationSummary* locations =
2582 new (allocator) LocationSummary(invoke,
2583 can_call
2584 ? LocationSummary::kCallOnSlowPath
2585 : LocationSummary::kNoCall,
2586 kIntrinsified);
2587 if (can_call && kUseBakerReadBarrier) {
2588 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2589 }
2590 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2591 locations->SetInAt(1, Location::RequiresRegister());
2592 locations->SetInAt(2, Location::RequiresRegister());
2593 // Use the same register for both the output and the new value or addend
2594 // to take advantage of XCHG or XADD. Arbitrarily pick RAX.
2595 locations->SetInAt(3, Location::RegisterLocation(RAX));
2596 locations->SetOut(Location::RegisterLocation(RAX));
2597 }
2598
VisitUnsafeGetAndAddInt(HInvoke * invoke)2599 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2600 VisitJdkUnsafeGetAndAddInt(invoke);
2601 }
2602
VisitUnsafeGetAndAddLong(HInvoke * invoke)2603 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2604 VisitJdkUnsafeGetAndAddLong(invoke);
2605 }
2606
VisitUnsafeGetAndSetInt(HInvoke * invoke)2607 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2608 VisitJdkUnsafeGetAndSetInt(invoke);
2609 }
2610
VisitUnsafeGetAndSetLong(HInvoke * invoke)2611 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2612 VisitJdkUnsafeGetAndSetLong(invoke);
2613 }
2614
VisitUnsafeGetAndSetObject(HInvoke * invoke)2615 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2616 VisitJdkUnsafeGetAndSetReference(invoke);
2617 }
2618
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2619 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2620 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2621 }
2622
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2623 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2624 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2625 }
2626
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2627 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2628 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2629 }
2630
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2631 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2632 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2633 }
2634
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2635 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2636 // The only supported read barrier implementation is the Baker-style read barriers.
2637 if (codegen_->EmitNonBakerReadBarrier()) {
2638 return;
2639 }
2640
2641 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2642 invoke->GetLocations()->AddRegisterTemps(3);
2643 }
2644
2645 enum class GetAndUpdateOp {
2646 kSet,
2647 kAdd,
2648 kBitwiseAnd,
2649 kBitwiseOr,
2650 kBitwiseXor
2651 };
2652
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op)2653 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2654 DataType::Type type,
2655 CodeGeneratorX86_64* codegen,
2656 GetAndUpdateOp get_and_update_op) {
2657 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2658 LocationSummary* locations = invoke->GetLocations();
2659
2660 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // Result.
2661 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); // Object pointer.
2662 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); // Long offset.
2663 DCHECK_EQ(out, locations->InAt(3).AsRegister<CpuRegister>()); // New value or addend.
2664 Address field_address(base, offset, TIMES_1, 0);
2665
2666 if (type == DataType::Type::kInt32) {
2667 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2668 __ LockXaddl(field_address, out);
2669 } else {
2670 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2671 __ xchgl(out, field_address);
2672 }
2673 } else if (type == DataType::Type::kInt64) {
2674 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2675 __ LockXaddq(field_address, out);
2676 } else {
2677 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2678 __ xchgq(out, field_address);
2679 }
2680 } else {
2681 DCHECK_EQ(type, DataType::Type::kReference);
2682 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2683 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2684 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2685 CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
2686
2687 if (codegen->EmitReadBarrier()) {
2688 DCHECK(kUseBakerReadBarrier);
2689 // Ensure that the field contains a to-space reference.
2690 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2691 invoke,
2692 Location::RegisterLocation(temp3.AsRegister()),
2693 base,
2694 field_address,
2695 /*needs_null_check=*/ false,
2696 /*always_update_field=*/ true,
2697 &temp1,
2698 &temp2);
2699 }
2700
2701 // Mark card for object as a new value shall be stored.
2702 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
2703 codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out, new_value_can_be_null);
2704
2705 if (kPoisonHeapReferences) {
2706 // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2707 // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2708 __ movl(temp1, out);
2709 __ PoisonHeapReference(temp1);
2710 __ xchgl(temp1, field_address);
2711 __ UnpoisonHeapReference(temp1);
2712 __ movl(out, temp1);
2713 } else {
2714 __ xchgl(out, field_address);
2715 }
2716 }
2717 }
2718
VisitUnsafeGetAndAddInt(HInvoke * invoke)2719 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2720 VisitJdkUnsafeGetAndAddInt(invoke);
2721 }
2722
VisitUnsafeGetAndAddLong(HInvoke * invoke)2723 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2724 VisitJdkUnsafeGetAndAddLong(invoke);
2725 }
2726
VisitUnsafeGetAndSetInt(HInvoke * invoke)2727 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2728 VisitJdkUnsafeGetAndSetInt(invoke);
2729 }
2730
VisitUnsafeGetAndSetLong(HInvoke * invoke)2731 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2732 VisitJdkUnsafeGetAndSetLong(invoke);
2733 }
2734
VisitUnsafeGetAndSetObject(HInvoke * invoke)2735 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2736 VisitJdkUnsafeGetAndSetReference(invoke);
2737 }
2738
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2739 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2740 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2741 }
2742
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2743 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2744 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2745 }
2746
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2747 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2748 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2749 }
2750
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2751 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2752 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2753 }
2754
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2755 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2756 GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2757 }
2758
VisitIntegerReverse(HInvoke * invoke)2759 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2760 LocationSummary* locations =
2761 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2762 locations->SetInAt(0, Location::RequiresRegister());
2763 locations->SetOut(Location::SameAsFirstInput());
2764 locations->AddTemp(Location::RequiresRegister());
2765 }
2766
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2767 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2768 X86_64Assembler* assembler) {
2769 Immediate imm_shift(shift);
2770 Immediate imm_mask(mask);
2771 __ movl(temp, reg);
2772 __ shrl(reg, imm_shift);
2773 __ andl(temp, imm_mask);
2774 __ andl(reg, imm_mask);
2775 __ shll(temp, imm_shift);
2776 __ orl(reg, temp);
2777 }
2778
VisitIntegerReverse(HInvoke * invoke)2779 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2780 X86_64Assembler* assembler = GetAssembler();
2781 LocationSummary* locations = invoke->GetLocations();
2782
2783 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2784 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2785
2786 /*
2787 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2788 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2789 * compared to generic luni implementation which has 5 rounds of swapping bits.
2790 * x = bswap x
2791 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2792 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2793 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2794 */
2795 __ bswapl(reg);
2796 SwapBits(reg, temp, 1, 0x55555555, assembler);
2797 SwapBits(reg, temp, 2, 0x33333333, assembler);
2798 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2799 }
2800
VisitLongReverse(HInvoke * invoke)2801 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2802 LocationSummary* locations =
2803 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2804 locations->SetInAt(0, Location::RequiresRegister());
2805 locations->SetOut(Location::SameAsFirstInput());
2806 locations->AddTemp(Location::RequiresRegister());
2807 locations->AddTemp(Location::RequiresRegister());
2808 }
2809
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2810 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2811 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2812 Immediate imm_shift(shift);
2813 __ movq(temp_mask, Immediate(mask));
2814 __ movq(temp, reg);
2815 __ shrq(reg, imm_shift);
2816 __ andq(temp, temp_mask);
2817 __ andq(reg, temp_mask);
2818 __ shlq(temp, imm_shift);
2819 __ orq(reg, temp);
2820 }
2821
VisitLongReverse(HInvoke * invoke)2822 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2823 X86_64Assembler* assembler = GetAssembler();
2824 LocationSummary* locations = invoke->GetLocations();
2825
2826 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2827 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2828 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2829
2830 /*
2831 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2832 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2833 * compared to generic luni implementation which has 5 rounds of swapping bits.
2834 * x = bswap x
2835 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2836 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2837 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2838 */
2839 __ bswapq(reg);
2840 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2841 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2842 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2843 }
2844
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2845 static void CreateBitCountLocations(
2846 ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2847 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2848 // Do nothing if there is no popcnt support. This results in generating
2849 // a call for the intrinsic rather than direct code.
2850 return;
2851 }
2852 LocationSummary* locations =
2853 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2854 locations->SetInAt(0, Location::Any());
2855 locations->SetOut(Location::RequiresRegister());
2856 }
2857
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2858 static void GenBitCount(X86_64Assembler* assembler,
2859 CodeGeneratorX86_64* codegen,
2860 HInvoke* invoke,
2861 bool is_long) {
2862 LocationSummary* locations = invoke->GetLocations();
2863 Location src = locations->InAt(0);
2864 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2865
2866 if (invoke->InputAt(0)->IsConstant()) {
2867 // Evaluate this at compile time.
2868 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2869 int32_t result = is_long
2870 ? POPCOUNT(static_cast<uint64_t>(value))
2871 : POPCOUNT(static_cast<uint32_t>(value));
2872 codegen->Load32BitValue(out, result);
2873 return;
2874 }
2875
2876 if (src.IsRegister()) {
2877 if (is_long) {
2878 __ popcntq(out, src.AsRegister<CpuRegister>());
2879 } else {
2880 __ popcntl(out, src.AsRegister<CpuRegister>());
2881 }
2882 } else if (is_long) {
2883 DCHECK(src.IsDoubleStackSlot());
2884 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2885 } else {
2886 DCHECK(src.IsStackSlot());
2887 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2888 }
2889 }
2890
VisitIntegerBitCount(HInvoke * invoke)2891 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2892 CreateBitCountLocations(allocator_, codegen_, invoke);
2893 }
2894
VisitIntegerBitCount(HInvoke * invoke)2895 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2896 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2897 }
2898
VisitLongBitCount(HInvoke * invoke)2899 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2900 CreateBitCountLocations(allocator_, codegen_, invoke);
2901 }
2902
VisitLongBitCount(HInvoke * invoke)2903 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2904 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2905 }
2906
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)2907 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
2908 LocationSummary* locations =
2909 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2910 locations->SetInAt(0, Location::Any());
2911 locations->SetOut(Location::RequiresRegister());
2912 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL
2913 : Location::RequiresRegister()); // any will do
2914 }
2915
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2916 static void GenOneBit(X86_64Assembler* assembler,
2917 CodeGeneratorX86_64* codegen,
2918 HInvoke* invoke,
2919 bool is_high, bool is_long) {
2920 LocationSummary* locations = invoke->GetLocations();
2921 Location src = locations->InAt(0);
2922 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2923
2924 if (invoke->InputAt(0)->IsConstant()) {
2925 // Evaluate this at compile time.
2926 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2927 if (value == 0) {
2928 __ xorl(out, out); // Clears upper bits too.
2929 return;
2930 }
2931 // Nonzero value.
2932 if (is_high) {
2933 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2934 : 31 - CLZ(static_cast<uint32_t>(value));
2935 } else {
2936 value = is_long ? CTZ(static_cast<uint64_t>(value))
2937 : CTZ(static_cast<uint32_t>(value));
2938 }
2939 if (is_long) {
2940 codegen->Load64BitValue(out, 1ULL << value);
2941 } else {
2942 codegen->Load32BitValue(out, 1 << value);
2943 }
2944 return;
2945 }
2946
2947 // Handle the non-constant cases.
2948 if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
2949 src.IsRegister()) {
2950 __ blsi(out, src.AsRegister<CpuRegister>());
2951 } else {
2952 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2953 if (is_high) {
2954 // Use architectural support: basically 1 << bsr.
2955 if (src.IsRegister()) {
2956 if (is_long) {
2957 __ bsrq(tmp, src.AsRegister<CpuRegister>());
2958 } else {
2959 __ bsrl(tmp, src.AsRegister<CpuRegister>());
2960 }
2961 } else if (is_long) {
2962 DCHECK(src.IsDoubleStackSlot());
2963 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2964 } else {
2965 DCHECK(src.IsStackSlot());
2966 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2967 }
2968 // BSR sets ZF if the input was zero.
2969 NearLabel is_zero, done;
2970 __ j(kEqual, &is_zero);
2971 __ movl(out, Immediate(1)); // Clears upper bits too.
2972 if (is_long) {
2973 __ shlq(out, tmp);
2974 } else {
2975 __ shll(out, tmp);
2976 }
2977 __ jmp(&done);
2978 __ Bind(&is_zero);
2979 __ xorl(out, out); // Clears upper bits too.
2980 __ Bind(&done);
2981 } else {
2982 // Copy input into temporary.
2983 if (src.IsRegister()) {
2984 if (is_long) {
2985 __ movq(tmp, src.AsRegister<CpuRegister>());
2986 } else {
2987 __ movl(tmp, src.AsRegister<CpuRegister>());
2988 }
2989 } else if (is_long) {
2990 DCHECK(src.IsDoubleStackSlot());
2991 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2992 } else {
2993 DCHECK(src.IsStackSlot());
2994 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2995 }
2996 // Do the bit twiddling: basically tmp & -tmp;
2997 if (is_long) {
2998 __ movq(out, tmp);
2999 __ negq(tmp);
3000 __ andq(out, tmp);
3001 } else {
3002 __ movl(out, tmp);
3003 __ negl(tmp);
3004 __ andl(out, tmp);
3005 }
3006 }
3007 }
3008 }
3009
VisitIntegerHighestOneBit(HInvoke * invoke)3010 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3011 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3012 }
3013
VisitIntegerHighestOneBit(HInvoke * invoke)3014 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3015 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
3016 }
3017
VisitLongHighestOneBit(HInvoke * invoke)3018 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3019 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3020 }
3021
VisitLongHighestOneBit(HInvoke * invoke)3022 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3023 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
3024 }
3025
VisitIntegerLowestOneBit(HInvoke * invoke)3026 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3027 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3028 }
3029
VisitIntegerLowestOneBit(HInvoke * invoke)3030 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3031 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
3032 }
3033
VisitLongLowestOneBit(HInvoke * invoke)3034 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3035 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3036 }
3037
VisitLongLowestOneBit(HInvoke * invoke)3038 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3039 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
3040 }
3041
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3042 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3043 LocationSummary* locations =
3044 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3045 locations->SetInAt(0, Location::Any());
3046 locations->SetOut(Location::RequiresRegister());
3047 }
3048
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3049 static void GenLeadingZeros(X86_64Assembler* assembler,
3050 CodeGeneratorX86_64* codegen,
3051 HInvoke* invoke, bool is_long) {
3052 LocationSummary* locations = invoke->GetLocations();
3053 Location src = locations->InAt(0);
3054 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3055
3056 int zero_value_result = is_long ? 64 : 32;
3057 if (invoke->InputAt(0)->IsConstant()) {
3058 // Evaluate this at compile time.
3059 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3060 if (value == 0) {
3061 value = zero_value_result;
3062 } else {
3063 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
3064 }
3065 codegen->Load32BitValue(out, value);
3066 return;
3067 }
3068
3069 // Handle the non-constant cases.
3070 if (src.IsRegister()) {
3071 if (is_long) {
3072 __ bsrq(out, src.AsRegister<CpuRegister>());
3073 } else {
3074 __ bsrl(out, src.AsRegister<CpuRegister>());
3075 }
3076 } else if (is_long) {
3077 DCHECK(src.IsDoubleStackSlot());
3078 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3079 } else {
3080 DCHECK(src.IsStackSlot());
3081 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3082 }
3083
3084 // BSR sets ZF if the input was zero, and the output is undefined.
3085 NearLabel is_zero, done;
3086 __ j(kEqual, &is_zero);
3087
3088 // Correct the result from BSR to get the CLZ result.
3089 __ xorl(out, Immediate(zero_value_result - 1));
3090 __ jmp(&done);
3091
3092 // Fix the zero case with the expected result.
3093 __ Bind(&is_zero);
3094 __ movl(out, Immediate(zero_value_result));
3095
3096 __ Bind(&done);
3097 }
3098
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3099 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3100 CreateLeadingZeroLocations(allocator_, invoke);
3101 }
3102
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3103 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3104 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3105 }
3106
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3107 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3108 CreateLeadingZeroLocations(allocator_, invoke);
3109 }
3110
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3111 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3112 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3113 }
3114
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3115 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3116 LocationSummary* locations =
3117 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3118 locations->SetInAt(0, Location::Any());
3119 locations->SetOut(Location::RequiresRegister());
3120 }
3121
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3122 static void GenTrailingZeros(X86_64Assembler* assembler,
3123 CodeGeneratorX86_64* codegen,
3124 HInvoke* invoke, bool is_long) {
3125 LocationSummary* locations = invoke->GetLocations();
3126 Location src = locations->InAt(0);
3127 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3128
3129 int zero_value_result = is_long ? 64 : 32;
3130 if (invoke->InputAt(0)->IsConstant()) {
3131 // Evaluate this at compile time.
3132 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3133 if (value == 0) {
3134 value = zero_value_result;
3135 } else {
3136 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3137 }
3138 codegen->Load32BitValue(out, value);
3139 return;
3140 }
3141
3142 // Handle the non-constant cases.
3143 if (src.IsRegister()) {
3144 if (is_long) {
3145 __ bsfq(out, src.AsRegister<CpuRegister>());
3146 } else {
3147 __ bsfl(out, src.AsRegister<CpuRegister>());
3148 }
3149 } else if (is_long) {
3150 DCHECK(src.IsDoubleStackSlot());
3151 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3152 } else {
3153 DCHECK(src.IsStackSlot());
3154 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3155 }
3156
3157 // BSF sets ZF if the input was zero, and the output is undefined.
3158 NearLabel done;
3159 __ j(kNotEqual, &done);
3160
3161 // Fix the zero case with the expected result.
3162 __ movl(out, Immediate(zero_value_result));
3163
3164 __ Bind(&done);
3165 }
3166
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3167 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3168 CreateTrailingZeroLocations(allocator_, invoke);
3169 }
3170
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3171 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3172 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3173 }
3174
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3175 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3176 CreateTrailingZeroLocations(allocator_, invoke);
3177 }
3178
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3179 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3180 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3181 }
3182
3183 #define VISIT_INTRINSIC(name, low, high, type, start_index) \
3184 void IntrinsicLocationsBuilderX86_64::Visit##name##ValueOf(HInvoke* invoke) { \
3185 InvokeRuntimeCallingConvention calling_convention; \
3186 IntrinsicVisitor::ComputeValueOfLocations( \
3187 invoke, \
3188 codegen_, \
3189 low, \
3190 (high) - (low) + 1, \
3191 Location::RegisterLocation(RAX), \
3192 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \
3193 } \
3194 void IntrinsicCodeGeneratorX86_64::Visit##name##ValueOf(HInvoke* invoke) { \
3195 IntrinsicVisitor::ValueOfInfo info = \
3196 IntrinsicVisitor::ComputeValueOfInfo(invoke, \
3197 codegen_->GetCompilerOptions(), \
3198 WellKnownClasses::java_lang_##name##_value, \
3199 low, \
3200 (high) - (low) + 1, \
3201 start_index); \
3202 HandleValueOf(invoke, info, type); \
3203 }
BOXED_TYPES(VISIT_INTRINSIC)3204 BOXED_TYPES(VISIT_INTRINSIC)
3205 #undef VISIT_INTRINSIC
3206
3207 template <typename T>
3208 static void Store(X86_64Assembler* assembler,
3209 DataType::Type primitive_type,
3210 const Address& address,
3211 const T& operand) {
3212 switch (primitive_type) {
3213 case DataType::Type::kInt8:
3214 case DataType::Type::kUint8: {
3215 __ movb(address, operand);
3216 break;
3217 }
3218 case DataType::Type::kInt16:
3219 case DataType::Type::kUint16: {
3220 __ movw(address, operand);
3221 break;
3222 }
3223 case DataType::Type::kInt32: {
3224 __ movl(address, operand);
3225 break;
3226 }
3227 default: {
3228 LOG(FATAL) << "Unrecognized ValueOf type " << primitive_type;
3229 }
3230 }
3231 }
3232
HandleValueOf(HInvoke * invoke,const IntrinsicVisitor::ValueOfInfo & info,DataType::Type type)3233 void IntrinsicCodeGeneratorX86_64::HandleValueOf(HInvoke* invoke,
3234 const IntrinsicVisitor::ValueOfInfo& info,
3235 DataType::Type type) {
3236 LocationSummary* locations = invoke->GetLocations();
3237 X86_64Assembler* assembler = GetAssembler();
3238
3239 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3240 InvokeRuntimeCallingConvention calling_convention;
3241 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3242 auto allocate_instance = [&]() {
3243 codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
3244 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3245 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3246 };
3247 if (invoke->InputAt(0)->IsIntConstant()) {
3248 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3249 if (static_cast<uint32_t>(value - info.low) < info.length) {
3250 // Just embed the object in the code.
3251 DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3252 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3253 } else {
3254 DCHECK(locations->CanCall());
3255 // Allocate and initialize a new object.
3256 // TODO: If we JIT, we could allocate the boxed value now, and store it in the
3257 // JIT object table.
3258 allocate_instance();
3259 Store(assembler, type, Address(out, info.value_offset), Immediate(value));
3260 }
3261 } else {
3262 DCHECK(locations->CanCall());
3263 CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
3264 // Check bounds of our cache.
3265 __ leal(out, Address(in, -info.low));
3266 __ cmpl(out, Immediate(info.length));
3267 NearLabel allocate, done;
3268 __ j(kAboveEqual, &allocate);
3269 // If the value is within the bounds, load the boxed value directly from the array.
3270 DCHECK_NE(out.AsRegister(), argument.AsRegister());
3271 codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
3272 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3273 "Check heap reference size.");
3274 __ movl(out, Address(argument, out, TIMES_4, 0));
3275 __ MaybeUnpoisonHeapReference(out);
3276 __ jmp(&done);
3277 __ Bind(&allocate);
3278 // Otherwise allocate and initialize a new object.
3279 allocate_instance();
3280 Store(assembler, type, Address(out, info.value_offset), in);
3281 __ Bind(&done);
3282 }
3283 }
3284
VisitReferenceGetReferent(HInvoke * invoke)3285 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3286 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3287 }
3288
VisitReferenceGetReferent(HInvoke * invoke)3289 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3290 X86_64Assembler* assembler = GetAssembler();
3291 LocationSummary* locations = invoke->GetLocations();
3292
3293 Location obj = locations->InAt(0);
3294 Location out = locations->Out();
3295
3296 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
3297 codegen_->AddSlowPath(slow_path);
3298
3299 if (codegen_->EmitReadBarrier()) {
3300 // Check self->GetWeakRefAccessEnabled().
3301 ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
3302 __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
3303 Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3304 __ j(kNotEqual, slow_path->GetEntryLabel());
3305 }
3306
3307 // Load the java.lang.ref.Reference class, use the output register as a temporary.
3308 codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
3309
3310 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3311 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3312 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3313 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3314 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3315 __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
3316 Immediate(0));
3317 __ j(kNotEqual, slow_path->GetEntryLabel());
3318
3319 // Load the value from the field.
3320 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3321 if (codegen_->EmitBakerReadBarrier()) {
3322 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3323 out,
3324 obj.AsRegister<CpuRegister>(),
3325 referent_offset,
3326 /*needs_null_check=*/ true);
3327 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3328 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3329 } else {
3330 __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
3331 codegen_->MaybeRecordImplicitNullCheck(invoke);
3332 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3333 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3334 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3335 }
3336 __ Bind(slow_path->GetExitLabel());
3337 }
3338
VisitReferenceRefersTo(HInvoke * invoke)3339 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3340 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3341 }
3342
VisitReferenceRefersTo(HInvoke * invoke)3343 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3344 X86_64Assembler* assembler = GetAssembler();
3345 LocationSummary* locations = invoke->GetLocations();
3346
3347 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
3348 CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
3349 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3350
3351 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3352 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3353
3354 __ movl(out, Address(obj, referent_offset));
3355 codegen_->MaybeRecordImplicitNullCheck(invoke);
3356 __ MaybeUnpoisonHeapReference(out);
3357 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3358 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3359
3360 __ cmpl(out, other);
3361
3362 if (codegen_->EmitReadBarrier()) {
3363 DCHECK(kUseBakerReadBarrier);
3364
3365 NearLabel calculate_result;
3366 __ j(kEqual, &calculate_result); // ZF set if taken.
3367
3368 // Check if the loaded reference is null in a way that leaves ZF clear for null.
3369 __ cmpl(out, Immediate(1));
3370 __ j(kBelow, &calculate_result); // ZF clear if taken.
3371
3372 // For correct memory visibility, we need a barrier before loading the lock word
3373 // but we already have the barrier emitted for volatile load above which is sufficient.
3374
3375 // Load the lockword and check if it is a forwarding address.
3376 static_assert(LockWord::kStateShift == 30u);
3377 static_assert(LockWord::kStateForwardingAddress == 3u);
3378 __ movl(out, Address(out, monitor_offset));
3379 __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3380 __ j(kBelow, &calculate_result); // ZF clear if taken.
3381
3382 // Extract the forwarding address and compare with `other`.
3383 __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3384 __ cmpl(out, other);
3385
3386 __ Bind(&calculate_result);
3387 }
3388
3389 // Convert ZF into the Boolean result.
3390 __ setcc(kEqual, out);
3391 __ movzxb(out, out);
3392 }
3393
VisitThreadInterrupted(HInvoke * invoke)3394 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3395 LocationSummary* locations =
3396 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3397 locations->SetOut(Location::RequiresRegister());
3398 }
3399
VisitThreadInterrupted(HInvoke * invoke)3400 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3401 X86_64Assembler* assembler = GetAssembler();
3402 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
3403 Address address = Address::Absolute
3404 (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
3405 NearLabel done;
3406 __ gs()->movl(out, address);
3407 __ testl(out, out);
3408 __ j(kEqual, &done);
3409 __ gs()->movl(address, Immediate(0));
3410 codegen_->MemoryFence();
3411 __ Bind(&done);
3412 }
3413
VisitReachabilityFence(HInvoke * invoke)3414 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
3415 LocationSummary* locations =
3416 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3417 locations->SetInAt(0, Location::Any());
3418 }
3419
VisitReachabilityFence(HInvoke * invoke)3420 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3421
CreateDivideUnsignedLocations(HInvoke * invoke,ArenaAllocator * allocator)3422 static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
3423 LocationSummary* locations =
3424 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3425 locations->SetInAt(0, Location::RegisterLocation(RAX));
3426 locations->SetInAt(1, Location::RequiresRegister());
3427 locations->SetOut(Location::SameAsFirstInput());
3428 // Intel uses edx:eax as the dividend.
3429 locations->AddTemp(Location::RegisterLocation(RDX));
3430 }
3431
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type data_type)3432 static void GenerateDivideUnsigned(HInvoke* invoke,
3433 CodeGeneratorX86_64* codegen,
3434 DataType::Type data_type) {
3435 LocationSummary* locations = invoke->GetLocations();
3436 Location out = locations->Out();
3437 Location first = locations->InAt(0);
3438 Location second = locations->InAt(1);
3439 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3440 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3441
3442 DCHECK_EQ(RAX, first.AsRegister<Register>());
3443 DCHECK_EQ(RAX, out.AsRegister<Register>());
3444 DCHECK_EQ(RDX, rdx.AsRegister());
3445
3446 // We check if the divisor is zero and bail to the slow path to handle if so.
3447 auto* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
3448 codegen->AddSlowPath(slow_path);
3449
3450 X86_64Assembler* assembler = codegen->GetAssembler();
3451 if (data_type == DataType::Type::kInt32) {
3452 __ testl(second_reg, second_reg);
3453 __ j(kEqual, slow_path->GetEntryLabel());
3454 __ xorl(rdx, rdx);
3455 __ divl(second_reg);
3456 } else {
3457 DCHECK(data_type == DataType::Type::kInt64);
3458 __ testq(second_reg, second_reg);
3459 __ j(kEqual, slow_path->GetEntryLabel());
3460 __ xorq(rdx, rdx);
3461 __ divq(second_reg);
3462 }
3463 __ Bind(slow_path->GetExitLabel());
3464 }
3465
VisitIntegerDivideUnsigned(HInvoke * invoke)3466 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3467 CreateDivideUnsignedLocations(invoke, allocator_);
3468 }
3469
VisitIntegerDivideUnsigned(HInvoke * invoke)3470 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3471 GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt32);
3472 }
3473
VisitLongDivideUnsigned(HInvoke * invoke)3474 void IntrinsicLocationsBuilderX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3475 CreateDivideUnsignedLocations(invoke, allocator_);
3476 }
3477
VisitLongDivideUnsigned(HInvoke * invoke)3478 void IntrinsicCodeGeneratorX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3479 GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt64);
3480 }
3481
VisitMathMultiplyHigh(HInvoke * invoke)3482 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3483 LocationSummary* locations =
3484 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3485 locations->SetInAt(0, Location::RegisterLocation(RAX));
3486 locations->SetInAt(1, Location::RequiresRegister());
3487 locations->SetOut(Location::RegisterLocation(RDX));
3488 locations->AddTemp(Location::RegisterLocation(RAX));
3489 }
3490
VisitMathMultiplyHigh(HInvoke * invoke)3491 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3492 X86_64Assembler* assembler = GetAssembler();
3493 LocationSummary* locations = invoke->GetLocations();
3494
3495 CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
3496
3497 DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
3498 DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
3499
3500 __ imulq(y);
3501 }
3502
3503 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
3504 public:
VarHandleSlowPathX86_64(HInvoke * invoke)3505 explicit VarHandleSlowPathX86_64(HInvoke* invoke)
3506 : IntrinsicSlowPathX86_64(invoke) {
3507 }
3508
SetVolatile(bool is_volatile)3509 void SetVolatile(bool is_volatile) {
3510 is_volatile_ = is_volatile;
3511 }
3512
SetAtomic(bool is_atomic)3513 void SetAtomic(bool is_atomic) {
3514 is_atomic_ = is_atomic;
3515 }
3516
SetNeedAnyStoreBarrier(bool need_any_store_barrier)3517 void SetNeedAnyStoreBarrier(bool need_any_store_barrier) {
3518 need_any_store_barrier_ = need_any_store_barrier;
3519 }
3520
SetNeedAnyAnyBarrier(bool need_any_any_barrier)3521 void SetNeedAnyAnyBarrier(bool need_any_any_barrier) {
3522 need_any_any_barrier_ = need_any_any_barrier;
3523 }
3524
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3525 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3526 get_and_update_op_ = get_and_update_op;
3527 }
3528
GetByteArrayViewCheckLabel()3529 Label* GetByteArrayViewCheckLabel() {
3530 return &byte_array_view_check_label_;
3531 }
3532
GetNativeByteOrderLabel()3533 Label* GetNativeByteOrderLabel() {
3534 return &native_byte_order_label_;
3535 }
3536
EmitNativeCode(CodeGenerator * codegen)3537 void EmitNativeCode(CodeGenerator* codegen) override {
3538 if (GetByteArrayViewCheckLabel()->IsLinked()) {
3539 EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
3540 }
3541 IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
3542 }
3543
3544 private:
GetInvoke() const3545 HInvoke* GetInvoke() const {
3546 return GetInstruction()->AsInvoke();
3547 }
3548
GetAccessModeTemplate() const3549 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3550 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3551 }
3552
3553 void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
3554
3555 Label byte_array_view_check_label_;
3556 Label native_byte_order_label_;
3557
3558 // Arguments forwarded to specific methods.
3559 bool is_volatile_;
3560 bool is_atomic_;
3561 bool need_any_store_barrier_;
3562 bool need_any_any_barrier_;
3563 GetAndUpdateOp get_and_update_op_;
3564 };
3565
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3566 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3567 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
3568 X86_64Assembler* assembler = codegen->GetAssembler();
3569 LocationSummary* locations = invoke->GetLocations();
3570 DCHECK(locations->InAt(0).Equals(locations->Out()));
3571 XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
3572 XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
3573 XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
3574 if (invoke->GetType() == DataType::Type::kFloat32) {
3575 __ vfmadd213ss(left, right, accumulator);
3576 } else {
3577 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
3578 __ vfmadd213sd(left, right, accumulator);
3579 }
3580 }
3581
VisitMathFmaDouble(HInvoke * invoke)3582 void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3583 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3584 GenerateMathFma(invoke, codegen_);
3585 }
3586
VisitMathFmaDouble(HInvoke * invoke)3587 void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3588 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3589 CreateFPFPFPToFPCallLocations(allocator_, invoke);
3590 }
3591 }
3592
VisitMathFmaFloat(HInvoke * invoke)3593 void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3594 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3595 GenerateMathFma(invoke, codegen_);
3596 }
3597
VisitMathFmaFloat(HInvoke * invoke)3598 void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3599 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3600 CreateFPFPFPToFPCallLocations(allocator_, invoke);
3601 }
3602 }
3603
3604 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,CpuRegister object,CpuRegister temp,Address type_address,bool object_can_be_null=true)3605 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
3606 VarHandleSlowPathX86_64* slow_path,
3607 CpuRegister object,
3608 CpuRegister temp,
3609 Address type_address,
3610 bool object_can_be_null = true) {
3611 X86_64Assembler* assembler = codegen->GetAssembler();
3612
3613 const MemberOffset class_offset = mirror::Object::ClassOffset();
3614 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3615
3616 NearLabel check_type_compatibility, type_matched;
3617
3618 // If the object is null, there is no need to check the type
3619 if (object_can_be_null) {
3620 __ testl(object, object);
3621 __ j(kZero, &type_matched);
3622 }
3623
3624 // Do not unpoison for in-memory comparison.
3625 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3626 __ movl(temp, Address(object, class_offset));
3627 __ Bind(&check_type_compatibility);
3628 __ cmpl(temp, type_address);
3629 __ j(kEqual, &type_matched);
3630 // Load the super class.
3631 __ MaybeUnpoisonHeapReference(temp);
3632 __ movl(temp, Address(temp, super_class_offset));
3633 // If the super class is null, we reached the root of the hierarchy without a match.
3634 // We let the slow path handle uncovered cases (e.g. interfaces).
3635 __ testl(temp, temp);
3636 __ j(kEqual, slow_path->GetEntryLabel());
3637 __ jmp(&check_type_compatibility);
3638 __ Bind(&type_matched);
3639 }
3640
3641 // Check access mode and the primitive type from VarHandle.varType.
3642 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3643 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,DataType::Type type)3644 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3645 CodeGeneratorX86_64* codegen,
3646 VarHandleSlowPathX86_64* slow_path,
3647 DataType::Type type) {
3648 X86_64Assembler* assembler = codegen->GetAssembler();
3649
3650 LocationSummary* locations = invoke->GetLocations();
3651 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3652 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3653
3654 mirror::VarHandle::AccessMode access_mode =
3655 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3656 Primitive::Type primitive_type = DataTypeToPrimitive(type);
3657
3658 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3659 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3660 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3661
3662 // Check that the operation is permitted.
3663 __ testl(Address(varhandle, access_mode_bit_mask_offset),
3664 Immediate(1u << static_cast<uint32_t>(access_mode)));
3665 __ j(kZero, slow_path->GetEntryLabel());
3666
3667 // For primitive types, we do not need a read barrier when loading a reference only for loading
3668 // constant field through the reference. For reference types, we deliberately avoid the read
3669 // barrier, letting the slow path handle the false negatives.
3670 __ movl(temp, Address(varhandle, var_type_offset));
3671 __ MaybeUnpoisonHeapReference(temp);
3672
3673 // Check the varType.primitiveType field against the type we're trying to use.
3674 __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3675 __ j(kNotEqual, slow_path->GetEntryLabel());
3676
3677 if (type == DataType::Type::kReference) {
3678 // Check reference arguments against the varType.
3679 // False negatives due to varType being an interface or array type
3680 // or due to the missing read barrier are handled by the slow path.
3681 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3682 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3683 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3684 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3685 HInstruction* arg = invoke->InputAt(arg_index);
3686 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3687 if (!arg->IsNullConstant()) {
3688 CpuRegister arg_reg = invoke->GetLocations()->InAt(arg_index).AsRegister<CpuRegister>();
3689 Address type_addr(varhandle, var_type_offset);
3690 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp, type_addr);
3691 }
3692 }
3693 }
3694 }
3695
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3696 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3697 CodeGeneratorX86_64* codegen,
3698 VarHandleSlowPathX86_64* slow_path) {
3699 X86_64Assembler* assembler = codegen->GetAssembler();
3700
3701 LocationSummary* locations = invoke->GetLocations();
3702 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3703
3704 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3705
3706 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3707 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3708 __ cmpl(Address(varhandle, coordinate_type0_offset), Immediate(0));
3709 __ j(kNotEqual, slow_path->GetEntryLabel());
3710 }
3711
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3712 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3713 CodeGeneratorX86_64* codegen,
3714 VarHandleSlowPathX86_64* slow_path) {
3715 VarHandleOptimizations optimizations(invoke);
3716 X86_64Assembler* assembler = codegen->GetAssembler();
3717
3718 LocationSummary* locations = invoke->GetLocations();
3719 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3720 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3721 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3722
3723 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3724 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3725
3726 // Null-check the object.
3727 if (!optimizations.GetSkipObjectNullCheck()) {
3728 __ testl(object, object);
3729 __ j(kZero, slow_path->GetEntryLabel());
3730 }
3731
3732 if (!optimizations.GetUseKnownImageVarHandle()) {
3733 // Check that the VarHandle references an instance field by checking that
3734 // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3735 // type compatibility check with the source object's type, which will fail for null.
3736 __ cmpl(Address(varhandle, coordinate_type1_offset), Immediate(0));
3737 __ j(kNotEqual, slow_path->GetEntryLabel());
3738
3739 // Check that the object has the correct type.
3740 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3741 GenerateSubTypeObjectCheckNoReadBarrier(codegen,
3742 slow_path,
3743 object,
3744 temp,
3745 Address(varhandle, coordinate_type0_offset),
3746 /*object_can_be_null=*/ false);
3747 }
3748 }
3749
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3750 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3751 CodeGeneratorX86_64* codegen,
3752 VarHandleSlowPathX86_64* slow_path) {
3753 VarHandleOptimizations optimizations(invoke);
3754 X86_64Assembler* assembler = codegen->GetAssembler();
3755 LocationSummary* locations = invoke->GetLocations();
3756
3757 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3758 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3759 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3760 DataType::Type value_type =
3761 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3762 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3763
3764 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3765 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3766 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3767 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3768 const MemberOffset class_offset = mirror::Object::ClassOffset();
3769 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3770
3771 // Null-check the object.
3772 if (!optimizations.GetSkipObjectNullCheck()) {
3773 __ testl(object, object);
3774 __ j(kZero, slow_path->GetEntryLabel());
3775 }
3776
3777 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3778
3779 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3780 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3781 // coordinateType0 shall not be null but we do not explicitly verify that.
3782 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3783 __ cmpl(Address(varhandle, coordinate_type1_offset.Int32Value()), Immediate(0));
3784 __ j(kEqual, slow_path->GetEntryLabel());
3785
3786 // Check object class against componentType0.
3787 //
3788 // This is an exact check and we defer other cases to the runtime. This includes
3789 // conversion to array of superclass references, which is valid but subsequently
3790 // requires all update operations to check that the value can indeed be stored.
3791 // We do not want to perform such extra checks in the intrinsified code.
3792 //
3793 // We do this check without read barrier, so there can be false negatives which we
3794 // defer to the slow path. There shall be no false negatives for array classes in the
3795 // boot image (including Object[] and primitive arrays) because they are non-movable.
3796 __ movl(temp, Address(object, class_offset.Int32Value()));
3797 __ cmpl(temp, Address(varhandle, coordinate_type0_offset.Int32Value()));
3798 __ j(kNotEqual, slow_path->GetEntryLabel());
3799
3800 // Check that the coordinateType0 is an array type. We do not need a read barrier
3801 // for loading constant reference fields (or chains of them) for comparison with null,
3802 // nor for finally loading a constant primitive field (primitive type) below.
3803 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3804 __ movl(temp, Address(temp, component_type_offset.Int32Value()));
3805 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3806 __ testl(temp, temp);
3807 __ j(kZero, slow_path->GetEntryLabel());
3808
3809 // Check that the array component type matches the primitive type.
3810 Label* slow_path_label;
3811 if (primitive_type == Primitive::kPrimNot) {
3812 slow_path_label = slow_path->GetEntryLabel();
3813 } else {
3814 // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3815 // we shall check for a byte array view in the slow path.
3816 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3817 // so we cannot emit that if we're JITting without boot image.
3818 bool boot_image_available =
3819 codegen->GetCompilerOptions().IsBootImage() ||
3820 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3821 bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3822 slow_path_label =
3823 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3824 }
3825 __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3826 __ j(kNotEqual, slow_path_label);
3827
3828 // Check for array index out of bounds.
3829 __ cmpl(index, Address(object, array_length_offset.Int32Value()));
3830 __ j(kAboveEqual, slow_path->GetEntryLabel());
3831 }
3832
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3833 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
3834 CodeGeneratorX86_64* codegen,
3835 VarHandleSlowPathX86_64* slow_path) {
3836 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3837 if (expected_coordinates_count == 0u) {
3838 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
3839 } else if (expected_coordinates_count == 1u) {
3840 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
3841 } else {
3842 DCHECK_EQ(expected_coordinates_count, 2u);
3843 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
3844 }
3845 }
3846
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)3847 static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
3848 CodeGeneratorX86_64* codegen,
3849 DataType::Type type) {
3850 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3851 VarHandleOptimizations optimizations(invoke);
3852 if (optimizations.GetUseKnownImageVarHandle()) {
3853 DCHECK_NE(expected_coordinates_count, 2u);
3854 if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
3855 return nullptr;
3856 }
3857 }
3858
3859 VarHandleSlowPathX86_64* slow_path =
3860 new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
3861 codegen->AddSlowPath(slow_path);
3862
3863 if (!optimizations.GetUseKnownImageVarHandle()) {
3864 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
3865 }
3866 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
3867
3868 return slow_path;
3869 }
3870
3871 struct VarHandleTarget {
3872 Register object; // The object holding the value to operate on.
3873 Register offset; // The offset of the value to operate on.
3874 };
3875
GetVarHandleTarget(HInvoke * invoke)3876 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
3877 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3878 LocationSummary* locations = invoke->GetLocations();
3879
3880 VarHandleTarget target;
3881 // The temporary allocated for loading the offset.
3882 target.offset = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
3883 // The reference to the object that holds the value to operate on.
3884 target.object = (expected_coordinates_count == 0u)
3885 ? locations->GetTemp(1).AsRegister<CpuRegister>().AsRegister()
3886 : locations->InAt(1).AsRegister<CpuRegister>().AsRegister();
3887 return target;
3888 }
3889
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorX86_64 * codegen)3890 static void GenerateVarHandleTarget(HInvoke* invoke,
3891 const VarHandleTarget& target,
3892 CodeGeneratorX86_64* codegen) {
3893 LocationSummary* locations = invoke->GetLocations();
3894 X86_64Assembler* assembler = codegen->GetAssembler();
3895 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3896
3897 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3898
3899 if (expected_coordinates_count <= 1u) {
3900 if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
3901 ScopedObjectAccess soa(Thread::Current());
3902 ArtField* target_field = GetBootImageVarHandleField(invoke);
3903 if (expected_coordinates_count == 0u) {
3904 ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
3905 __ movl(CpuRegister(target.object),
3906 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /*no_rip=*/ false));
3907 if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
3908 codegen->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(declaring_class));
3909 } else {
3910 codegen->RecordBootImageTypePatch(declaring_class->GetDexFile(),
3911 declaring_class->GetDexTypeIndex());
3912 }
3913 }
3914 __ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value()));
3915 } else {
3916 // For static fields, we need to fill the `target.object` with the declaring class,
3917 // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
3918 // we do not need the declaring class, so we can forget the `ArtField*` when
3919 // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
3920 CpuRegister field((expected_coordinates_count == 0) ? target.object : target.offset);
3921
3922 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
3923 const MemberOffset offset_offset = ArtField::OffsetOffset();
3924
3925 // Load the ArtField*, the offset and, if needed, declaring class.
3926 __ movq(field, Address(varhandle, art_field_offset));
3927 __ movl(CpuRegister(target.offset), Address(field, offset_offset));
3928 if (expected_coordinates_count == 0u) {
3929 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
3930 instr_codegen->GenerateGcRootFieldLoad(invoke,
3931 Location::RegisterLocation(target.object),
3932 Address(field, ArtField::DeclaringClassOffset()),
3933 /*fixup_label=*/nullptr,
3934 codegen->GetCompilerReadBarrierOption());
3935 }
3936 }
3937 } else {
3938 DCHECK_EQ(expected_coordinates_count, 2u);
3939
3940 DataType::Type value_type =
3941 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3942 ScaleFactor scale = CodeGenerator::ScaleFactorForType(value_type);
3943 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
3944 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3945
3946 // The effect of LEA is `target.offset = index * scale + data_offset`.
3947 __ leal(CpuRegister(target.offset), Address(index, scale, data_offset.Int32Value()));
3948 }
3949 }
3950
HasVarHandleIntrinsicImplementation(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3951 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3952 // The only supported read barrier implementation is the Baker-style read barriers.
3953 if (codegen->EmitNonBakerReadBarrier()) {
3954 return false;
3955 }
3956
3957 VarHandleOptimizations optimizations(invoke);
3958 if (optimizations.GetDoNotIntrinsify()) {
3959 return false;
3960 }
3961
3962 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3963 DCHECK_LE(expected_coordinates_count, 2u); // Filtered by the `DoNotIntrinsify` flag above.
3964 return true;
3965 }
3966
CreateVarHandleCommonLocations(HInvoke * invoke)3967 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
3968 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3969 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3970 LocationSummary* locations = new (allocator) LocationSummary(
3971 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3972
3973 locations->SetInAt(0, Location::RequiresRegister());
3974 // Require coordinates in registers. These are the object holding the value
3975 // to operate on (except for static fields) and index (for arrays and views).
3976 for (size_t i = 0; i != expected_coordinates_count; ++i) {
3977 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
3978 }
3979
3980 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3981 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3982 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3983 HInstruction* arg = invoke->InputAt(arg_index);
3984 if (DataType::IsFloatingPointType(arg->GetType())) {
3985 locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
3986 } else {
3987 locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
3988 }
3989 }
3990
3991 // Add a temporary for offset.
3992 locations->AddTemp(Location::RequiresRegister());
3993
3994 if (expected_coordinates_count == 0u) {
3995 // Add a temporary to hold the declaring class.
3996 locations->AddTemp(Location::RequiresRegister());
3997 }
3998
3999 return locations;
4000 }
4001
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4002 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4003 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4004 return;
4005 }
4006
4007 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4008 if (DataType::IsFloatingPointType(invoke->GetType())) {
4009 locations->SetOut(Location::RequiresFpuRegister());
4010 } else {
4011 locations->SetOut(Location::RequiresRegister());
4012 }
4013 }
4014
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool byte_swap=false)4015 static void GenerateVarHandleGet(HInvoke* invoke,
4016 CodeGeneratorX86_64* codegen,
4017 bool byte_swap = false) {
4018 DataType::Type type = invoke->GetType();
4019 DCHECK_NE(type, DataType::Type::kVoid);
4020
4021 LocationSummary* locations = invoke->GetLocations();
4022 X86_64Assembler* assembler = codegen->GetAssembler();
4023
4024 VarHandleTarget target = GetVarHandleTarget(invoke);
4025 VarHandleSlowPathX86_64* slow_path = nullptr;
4026 if (!byte_swap) {
4027 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4028 GenerateVarHandleTarget(invoke, target, codegen);
4029 if (slow_path != nullptr) {
4030 __ Bind(slow_path->GetNativeByteOrderLabel());
4031 }
4032 }
4033
4034 // Load the value from the field
4035 Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4036 Location out = locations->Out();
4037
4038 if (type == DataType::Type::kReference) {
4039 if (codegen->EmitReadBarrier()) {
4040 DCHECK(kUseBakerReadBarrier);
4041 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4042 invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
4043 } else {
4044 __ movl(out.AsRegister<CpuRegister>(), src);
4045 __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
4046 }
4047 DCHECK(!byte_swap);
4048 } else {
4049 codegen->LoadFromMemoryNoReference(type, out, src);
4050 if (byte_swap) {
4051 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4052 codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
4053 }
4054 }
4055
4056 if (slow_path != nullptr) {
4057 DCHECK(!byte_swap);
4058 __ Bind(slow_path->GetExitLabel());
4059 }
4060 }
4061
VisitVarHandleGet(HInvoke * invoke)4062 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
4063 CreateVarHandleGetLocations(invoke, codegen_);
4064 }
4065
VisitVarHandleGet(HInvoke * invoke)4066 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
4067 GenerateVarHandleGet(invoke, codegen_);
4068 }
4069
VisitVarHandleGetAcquire(HInvoke * invoke)4070 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4071 CreateVarHandleGetLocations(invoke, codegen_);
4072 }
4073
VisitVarHandleGetAcquire(HInvoke * invoke)4074 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4075 // VarHandleGetAcquire is the same as VarHandleGet on x86-64 due to the x86 memory model.
4076 GenerateVarHandleGet(invoke, codegen_);
4077 }
4078
VisitVarHandleGetOpaque(HInvoke * invoke)4079 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4080 CreateVarHandleGetLocations(invoke, codegen_);
4081 }
4082
VisitVarHandleGetOpaque(HInvoke * invoke)4083 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4084 // VarHandleGetOpaque is the same as VarHandleGet on x86-64 due to the x86 memory model.
4085 GenerateVarHandleGet(invoke, codegen_);
4086 }
4087
VisitVarHandleGetVolatile(HInvoke * invoke)4088 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4089 CreateVarHandleGetLocations(invoke, codegen_);
4090 }
4091
VisitVarHandleGetVolatile(HInvoke * invoke)4092 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4093 // VarHandleGetVolatile is the same as VarHandleGet on x86-64 due to the x86 memory model.
4094 GenerateVarHandleGet(invoke, codegen_);
4095 }
4096
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4097 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4098 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4099 return;
4100 }
4101
4102 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4103
4104 // Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
4105 locations->AddTemp(Location::RequiresRegister());
4106 }
4107
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_volatile,bool is_atomic,bool byte_swap=false)4108 static void GenerateVarHandleSet(HInvoke* invoke,
4109 CodeGeneratorX86_64* codegen,
4110 bool is_volatile,
4111 bool is_atomic,
4112 bool byte_swap = false) {
4113 X86_64Assembler* assembler = codegen->GetAssembler();
4114
4115 LocationSummary* locations = invoke->GetLocations();
4116 const uint32_t last_temp_index = locations->GetTempCount() - 1;
4117
4118 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4119 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4120
4121 VarHandleTarget target = GetVarHandleTarget(invoke);
4122 VarHandleSlowPathX86_64* slow_path = nullptr;
4123 if (!byte_swap) {
4124 slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
4125 GenerateVarHandleTarget(invoke, target, codegen);
4126 if (slow_path != nullptr) {
4127 slow_path->SetVolatile(is_volatile);
4128 slow_path->SetAtomic(is_atomic);
4129 __ Bind(slow_path->GetNativeByteOrderLabel());
4130 }
4131 }
4132
4133 switch (invoke->GetIntrinsic()) {
4134 case Intrinsics::kVarHandleSetRelease:
4135 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4136 break;
4137 case Intrinsics::kVarHandleSetVolatile:
4138 // setVolatile needs kAnyStore barrier, but HandleFieldSet takes care of that.
4139 break;
4140 default:
4141 // Other intrinsics don't need a barrier.
4142 break;
4143 }
4144
4145 Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4146
4147 // Store the value to the field.
4148 codegen->GetInstructionCodegen()->HandleFieldSet(
4149 invoke,
4150 value_index,
4151 last_temp_index,
4152 value_type,
4153 dst,
4154 CpuRegister(target.object),
4155 is_volatile,
4156 is_atomic,
4157 /*value_can_be_null=*/true,
4158 byte_swap,
4159 // Value can be null, and this write barrier is not being relied on for other sets.
4160 value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4161 WriteBarrierKind::kDontEmit);
4162
4163 // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
4164
4165 if (slow_path != nullptr) {
4166 DCHECK(!byte_swap);
4167 __ Bind(slow_path->GetExitLabel());
4168 }
4169 }
4170
VisitVarHandleSet(HInvoke * invoke)4171 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
4172 CreateVarHandleSetLocations(invoke, codegen_);
4173 }
4174
VisitVarHandleSet(HInvoke * invoke)4175 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
4176 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4177 }
4178
VisitVarHandleSetOpaque(HInvoke * invoke)4179 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4180 CreateVarHandleSetLocations(invoke, codegen_);
4181 }
4182
VisitVarHandleSetOpaque(HInvoke * invoke)4183 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4184 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4185 }
4186
VisitVarHandleSetRelease(HInvoke * invoke)4187 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4188 CreateVarHandleSetLocations(invoke, codegen_);
4189 }
4190
VisitVarHandleSetRelease(HInvoke * invoke)4191 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4192 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4193 }
4194
VisitVarHandleSetVolatile(HInvoke * invoke)4195 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4196 CreateVarHandleSetLocations(invoke, codegen_);
4197 }
4198
VisitVarHandleSetVolatile(HInvoke * invoke)4199 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4200 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
4201 }
4202
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4203 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4204 CodeGeneratorX86_64* codegen) {
4205 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4206 return;
4207 }
4208
4209 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4210 uint32_t expected_value_index = number_of_arguments - 2;
4211 uint32_t new_value_index = number_of_arguments - 1;
4212 DataType::Type return_type = invoke->GetType();
4213 DataType::Type expected_type = GetDataTypeFromShorty(invoke, expected_value_index);
4214 DCHECK_EQ(expected_type, GetDataTypeFromShorty(invoke, new_value_index));
4215
4216 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4217
4218 if (DataType::IsFloatingPointType(return_type)) {
4219 locations->SetOut(Location::RequiresFpuRegister());
4220 } else {
4221 // Take advantage of the fact that CMPXCHG writes result to RAX.
4222 locations->SetOut(Location::RegisterLocation(RAX));
4223 }
4224
4225 if (DataType::IsFloatingPointType(expected_type)) {
4226 // RAX is needed to load the expected floating-point value into a register for CMPXCHG.
4227 locations->AddTemp(Location::RegisterLocation(RAX));
4228 // Another temporary is needed to load the new floating-point value into a register for CMPXCHG.
4229 locations->AddTemp(Location::RequiresRegister());
4230 } else {
4231 // Ensure that expected value is in RAX, as required by CMPXCHG.
4232 locations->SetInAt(expected_value_index, Location::RegisterLocation(RAX));
4233 locations->SetInAt(new_value_index, Location::RequiresRegister());
4234 if (expected_type == DataType::Type::kReference) {
4235 // Need two temporaries for MarkGCCard.
4236 locations->AddTemp(Location::RequiresRegister());
4237 locations->AddTemp(Location::RequiresRegister());
4238 if (codegen->EmitReadBarrier()) {
4239 // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
4240 DCHECK(kUseBakerReadBarrier);
4241 locations->AddTemp(Location::RequiresRegister());
4242 }
4243 }
4244 // RAX is clobbered in CMPXCHG, but no need to mark it as temporary as it's the output register.
4245 DCHECK_EQ(RAX, locations->Out().AsRegister<Register>());
4246 }
4247 }
4248
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_cmpxchg,bool byte_swap=false)4249 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4250 CodeGeneratorX86_64* codegen,
4251 bool is_cmpxchg,
4252 bool byte_swap = false) {
4253 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4254
4255 X86_64Assembler* assembler = codegen->GetAssembler();
4256 LocationSummary* locations = invoke->GetLocations();
4257
4258 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4259 uint32_t expected_value_index = number_of_arguments - 2;
4260 uint32_t new_value_index = number_of_arguments - 1;
4261 DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4262
4263 VarHandleSlowPathX86_64* slow_path = nullptr;
4264 VarHandleTarget target = GetVarHandleTarget(invoke);
4265 if (!byte_swap) {
4266 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4267 GenerateVarHandleTarget(invoke, target, codegen);
4268 if (slow_path != nullptr) {
4269 __ Bind(slow_path->GetNativeByteOrderLabel());
4270 }
4271 }
4272
4273 uint32_t temp_count = locations->GetTempCount();
4274 GenCompareAndSetOrExchange(codegen,
4275 invoke,
4276 type,
4277 CpuRegister(target.object),
4278 CpuRegister(target.offset),
4279 /*temp1_index=*/ temp_count - 1,
4280 /*temp2_index=*/ temp_count - 2,
4281 /*temp3_index=*/ temp_count - 3,
4282 locations->InAt(new_value_index),
4283 locations->InAt(expected_value_index),
4284 locations->Out(),
4285 is_cmpxchg,
4286 byte_swap);
4287
4288 // We are using LOCK CMPXCHG in all cases because there is no CAS equivalent that has weak
4289 // failure semantics. LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
4290
4291 if (slow_path != nullptr) {
4292 DCHECK(!byte_swap);
4293 __ Bind(slow_path->GetExitLabel());
4294 }
4295 }
4296
VisitVarHandleCompareAndSet(HInvoke * invoke)4297 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4298 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4299 }
4300
VisitVarHandleCompareAndSet(HInvoke * invoke)4301 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4302 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4303 }
4304
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4305 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4306 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4307 }
4308
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4309 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4310 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4311 }
4312
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4313 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4314 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4315 }
4316
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4317 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4318 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4319 }
4320
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4321 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4322 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4323 }
4324
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4325 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4326 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4327 }
4328
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4329 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4330 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4331 }
4332
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4333 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4334 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4335 }
4336
VisitVarHandleCompareAndExchange(HInvoke * invoke)4337 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4338 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4339 }
4340
VisitVarHandleCompareAndExchange(HInvoke * invoke)4341 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4342 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4343 }
4344
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4345 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4346 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4347 }
4348
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4349 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4350 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4351 }
4352
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4353 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4354 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4355 }
4356
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4357 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4358 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4359 }
4360
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4361 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4362 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4363 return;
4364 }
4365
4366 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4367 uint32_t new_value_index = number_of_arguments - 1;
4368 DataType::Type type = invoke->GetType();
4369 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4370
4371 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4372
4373 if (DataType::IsFloatingPointType(type)) {
4374 locations->SetOut(Location::RequiresFpuRegister());
4375 // A temporary is needed to load the new floating-point value into a register for XCHG.
4376 locations->AddTemp(Location::RequiresRegister());
4377 } else {
4378 // Use the same register for both the new value and output to take advantage of XCHG.
4379 // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
4380 locations->SetOut(Location::RegisterLocation(RAX));
4381 locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4382 if (type == DataType::Type::kReference) {
4383 // Need two temporaries for MarkGCCard.
4384 locations->AddTemp(Location::RequiresRegister());
4385 locations->AddTemp(Location::RequiresRegister());
4386 if (codegen->EmitReadBarrier()) {
4387 // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
4388 DCHECK(kUseBakerReadBarrier);
4389 locations->AddTemp(Location::RequiresRegister());
4390 }
4391 }
4392 }
4393 }
4394
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,CpuRegister ref,bool byte_swap)4395 static void GenerateVarHandleGetAndSet(HInvoke* invoke,
4396 CodeGeneratorX86_64* codegen,
4397 Location value,
4398 DataType::Type type,
4399 Address field_addr,
4400 CpuRegister ref,
4401 bool byte_swap) {
4402 X86_64Assembler* assembler = codegen->GetAssembler();
4403 LocationSummary* locations = invoke->GetLocations();
4404 Location out = locations->Out();
4405 uint32_t temp_count = locations->GetTempCount();
4406
4407 if (DataType::IsFloatingPointType(type)) {
4408 // `getAndSet` for floating-point types: move the new FP value into a register, atomically
4409 // exchange it with the field, and move the old value into the output FP register.
4410 Location temp = locations->GetTemp(temp_count - 1);
4411 codegen->Move(temp, value);
4412 bool is64bit = (type == DataType::Type::kFloat64);
4413 DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4414 if (byte_swap) {
4415 codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4416 }
4417 if (is64bit) {
4418 __ xchgq(temp.AsRegister<CpuRegister>(), field_addr);
4419 } else {
4420 __ xchgl(temp.AsRegister<CpuRegister>(), field_addr);
4421 }
4422 if (byte_swap) {
4423 codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4424 }
4425 __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit);
4426 } else if (type == DataType::Type::kReference) {
4427 // `getAndSet` for references: load reference and atomically exchange it with the field.
4428 // Output register is the same as the one holding new value, so no need to move the result.
4429 DCHECK(!byte_swap);
4430
4431 CpuRegister temp1 = locations->GetTemp(temp_count - 1).AsRegister<CpuRegister>();
4432 CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>();
4433 CpuRegister valreg = value.AsRegister<CpuRegister>();
4434
4435 if (codegen->EmitBakerReadBarrier()) {
4436 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4437 invoke,
4438 locations->GetTemp(temp_count - 3),
4439 ref,
4440 field_addr,
4441 /*needs_null_check=*/ false,
4442 /*always_update_field=*/ true,
4443 &temp1,
4444 &temp2);
4445 }
4446 codegen->MarkGCCard(temp1, temp2, ref);
4447
4448 DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4449 if (kPoisonHeapReferences) {
4450 // Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is
4451 // the same as `target.object` (for code like `vh.getAndSet(obj, obj)`).
4452 __ movl(temp1, valreg);
4453 __ PoisonHeapReference(temp1);
4454 __ xchgl(temp1, field_addr);
4455 __ UnpoisonHeapReference(temp1);
4456 __ movl(valreg, temp1);
4457 } else {
4458 __ xchgl(valreg, field_addr);
4459 }
4460 } else {
4461 // `getAndSet` for integral types: atomically exchange the new value with the field. Output
4462 // register is the same as the one holding new value. Do sign extend / zero extend as needed.
4463 if (byte_swap) {
4464 codegen->GetInstructionCodegen()->Bswap(value, type);
4465 }
4466 CpuRegister valreg = value.AsRegister<CpuRegister>();
4467 DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4468 switch (type) {
4469 case DataType::Type::kBool:
4470 case DataType::Type::kUint8:
4471 __ xchgb(valreg, field_addr);
4472 __ movzxb(valreg, valreg);
4473 break;
4474 case DataType::Type::kInt8:
4475 __ xchgb(valreg, field_addr);
4476 __ movsxb(valreg, valreg);
4477 break;
4478 case DataType::Type::kUint16:
4479 __ xchgw(valreg, field_addr);
4480 __ movzxw(valreg, valreg);
4481 break;
4482 case DataType::Type::kInt16:
4483 __ xchgw(valreg, field_addr);
4484 __ movsxw(valreg, valreg);
4485 break;
4486 case DataType::Type::kInt32:
4487 case DataType::Type::kUint32:
4488 __ xchgl(valreg, field_addr);
4489 break;
4490 case DataType::Type::kInt64:
4491 case DataType::Type::kUint64:
4492 __ xchgq(valreg, field_addr);
4493 break;
4494 default:
4495 LOG(FATAL) << "unexpected type in getAndSet intrinsic: " << type;
4496 UNREACHABLE();
4497 }
4498 if (byte_swap) {
4499 codegen->GetInstructionCodegen()->Bswap(value, type);
4500 }
4501 }
4502 }
4503
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4504 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4505 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4506 return;
4507 }
4508
4509 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4510 uint32_t new_value_index = number_of_arguments - 1;
4511 DataType::Type type = invoke->GetType();
4512 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4513
4514 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4515
4516 DCHECK_NE(DataType::Type::kReference, type);
4517 DCHECK(!DataType::IsFloatingPointType(type));
4518
4519 // A temporary to compute the bitwise operation on the old and the new values.
4520 locations->AddTemp(Location::RequiresRegister());
4521 // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
4522 // instructions that accept 64-bit immediate on x86_64).
4523 locations->SetInAt(new_value_index, DataType::Is64BitType(type)
4524 ? Location::RequiresRegister()
4525 : Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
4526 // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
4527 locations->SetOut(Location::RegisterLocation(RAX));
4528 }
4529
GenerateVarHandleGetAndOp(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,GetAndUpdateOp get_and_update_op,bool byte_swap)4530 static void GenerateVarHandleGetAndOp(HInvoke* invoke,
4531 CodeGeneratorX86_64* codegen,
4532 Location value,
4533 DataType::Type type,
4534 Address field_addr,
4535 GetAndUpdateOp get_and_update_op,
4536 bool byte_swap) {
4537 X86_64Assembler* assembler = codegen->GetAssembler();
4538 LocationSummary* locations = invoke->GetLocations();
4539 Location temp_loc = locations->GetTemp(locations->GetTempCount() - 1);
4540 Location rax_loc = locations->Out();
4541 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4542 CpuRegister rax = rax_loc.AsRegister<CpuRegister>();
4543 DCHECK_EQ(rax.AsRegister(), RAX);
4544 bool is64Bit = DataType::Is64BitType(type);
4545
4546 NearLabel retry;
4547 __ Bind(&retry);
4548
4549 // Load field value into RAX and copy it into a temporary register for the operation.
4550 codegen->LoadFromMemoryNoReference(type, Location::RegisterLocation(RAX), field_addr);
4551 codegen->Move(temp_loc, rax_loc);
4552 if (byte_swap) {
4553 // Byte swap the temporary, since we need to perform operation in native endianness.
4554 codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4555 }
4556
4557 DCHECK_IMPLIES(value.IsConstant(), !is64Bit);
4558 int32_t const_value = value.IsConstant()
4559 ? CodeGenerator::GetInt32ValueOf(value.GetConstant())
4560 : 0;
4561
4562 // Use 32-bit registers for 8/16/32-bit types to save on the REX prefix.
4563 switch (get_and_update_op) {
4564 case GetAndUpdateOp::kAdd:
4565 DCHECK(byte_swap); // The non-byte-swapping path should use a faster XADD instruction.
4566 if (is64Bit) {
4567 __ addq(temp, value.AsRegister<CpuRegister>());
4568 } else if (value.IsConstant()) {
4569 __ addl(temp, Immediate(const_value));
4570 } else {
4571 __ addl(temp, value.AsRegister<CpuRegister>());
4572 }
4573 break;
4574 case GetAndUpdateOp::kBitwiseAnd:
4575 if (is64Bit) {
4576 __ andq(temp, value.AsRegister<CpuRegister>());
4577 } else if (value.IsConstant()) {
4578 __ andl(temp, Immediate(const_value));
4579 } else {
4580 __ andl(temp, value.AsRegister<CpuRegister>());
4581 }
4582 break;
4583 case GetAndUpdateOp::kBitwiseOr:
4584 if (is64Bit) {
4585 __ orq(temp, value.AsRegister<CpuRegister>());
4586 } else if (value.IsConstant()) {
4587 __ orl(temp, Immediate(const_value));
4588 } else {
4589 __ orl(temp, value.AsRegister<CpuRegister>());
4590 }
4591 break;
4592 case GetAndUpdateOp::kBitwiseXor:
4593 if (is64Bit) {
4594 __ xorq(temp, value.AsRegister<CpuRegister>());
4595 } else if (value.IsConstant()) {
4596 __ xorl(temp, Immediate(const_value));
4597 } else {
4598 __ xorl(temp, value.AsRegister<CpuRegister>());
4599 }
4600 break;
4601 default:
4602 LOG(FATAL) << "unexpected operation";
4603 UNREACHABLE();
4604 }
4605
4606 if (byte_swap) {
4607 // RAX still contains the original value, but we need to byte swap the temporary back.
4608 codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4609 }
4610
4611 switch (type) {
4612 case DataType::Type::kBool:
4613 case DataType::Type::kUint8:
4614 case DataType::Type::kInt8:
4615 __ LockCmpxchgb(field_addr, temp);
4616 break;
4617 case DataType::Type::kUint16:
4618 case DataType::Type::kInt16:
4619 __ LockCmpxchgw(field_addr, temp);
4620 break;
4621 case DataType::Type::kInt32:
4622 case DataType::Type::kUint32:
4623 __ LockCmpxchgl(field_addr, temp);
4624 break;
4625 case DataType::Type::kInt64:
4626 case DataType::Type::kUint64:
4627 __ LockCmpxchgq(field_addr, temp);
4628 break;
4629 default:
4630 LOG(FATAL) << "unexpected type in getAndBitwiseOp intrinsic";
4631 UNREACHABLE();
4632 }
4633
4634 __ j(kNotZero, &retry);
4635
4636 // The result is in RAX after CMPXCHG. Byte swap if necessary, but do not sign/zero extend,
4637 // as it has already been done by `LoadFromMemoryNoReference` above (and not altered by CMPXCHG).
4638 if (byte_swap) {
4639 codegen->GetInstructionCodegen()->Bswap(rax_loc, type);
4640 }
4641 }
4642
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4643 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4644 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4645 return;
4646 }
4647
4648 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4649 uint32_t new_value_index = number_of_arguments - 1;
4650 DataType::Type type = invoke->GetType();
4651 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4652
4653 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4654
4655 if (DataType::IsFloatingPointType(type)) {
4656 locations->SetOut(Location::RequiresFpuRegister());
4657 // Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD.
4658 locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
4659 // CMPXCHG clobbers RAX.
4660 locations->AddTemp(Location::RegisterLocation(RAX));
4661 // An FP temporary to load the old value from the field and perform FP addition.
4662 locations->AddTemp(Location::RequiresFpuRegister());
4663 // A temporary to hold the new value for CMPXCHG.
4664 locations->AddTemp(Location::RequiresRegister());
4665 } else {
4666 DCHECK_NE(type, DataType::Type::kReference);
4667 // Use the same register for both the new value and output to take advantage of XADD.
4668 // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
4669 // back to GenerateVarHandleGetAndOp that expects out in RAX.
4670 locations->SetOut(Location::RegisterLocation(RAX));
4671 locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4672 if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
4673 // For byte array views with non-native endianness we need extra BSWAP operations, so we
4674 // cannot use XADD and have to fallback to a generic implementation based on CMPXCH. In that
4675 // case we need two temporary registers: one to hold value instead of RAX (which may get
4676 // clobbered by repeated CMPXCHG) and one for performing the operation. At compile time we
4677 // cannot distinguish this case from arrays or native-endian byte array views.
4678 locations->AddTemp(Location::RequiresRegister());
4679 locations->AddTemp(Location::RequiresRegister());
4680 }
4681 }
4682 }
4683
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,bool byte_swap)4684 static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
4685 CodeGeneratorX86_64* codegen,
4686 Location value,
4687 DataType::Type type,
4688 Address field_addr,
4689 bool byte_swap) {
4690 X86_64Assembler* assembler = codegen->GetAssembler();
4691 LocationSummary* locations = invoke->GetLocations();
4692 Location out = locations->Out();
4693 uint32_t temp_count = locations->GetTempCount();
4694
4695 if (DataType::IsFloatingPointType(type)) {
4696 if (byte_swap) {
4697 // This code should never be executed: it is the case of a byte array view (since it requires
4698 // a byte swap), and varhandles for byte array views support numeric atomic update access mode
4699 // only for int and long, but not for floating-point types (see javadoc comments for
4700 // java.lang.invoke.MethodHandles.byteArrayViewVarHandle()). But ART varhandle implementation
4701 // for byte array views treats floating-point types them as numeric types in
4702 // ByteArrayViewVarHandle::Access(). Terefore we do generate intrinsic code, but it always
4703 // fails access mode check at runtime prior to reaching this point. Illegal instruction UD2
4704 // ensures that if control flow gets here by mistake, we will notice.
4705 __ ud2();
4706 }
4707
4708 // `getAndAdd` for floating-point types: load the old FP value into a temporary FP register and
4709 // in RAX for CMPXCHG, add the new FP value to the old one, move it to a non-FP temporary for
4710 // CMPXCHG and loop until CMPXCHG succeeds. Move the result from RAX to the output FP register.
4711 bool is64bit = (type == DataType::Type::kFloat64);
4712 DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4713 XmmRegister fptemp = locations->GetTemp(temp_count - 2).AsFpuRegister<XmmRegister>();
4714 Location rax_loc = Location::RegisterLocation(RAX);
4715 Location temp_loc = locations->GetTemp(temp_count - 1);
4716 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4717
4718 NearLabel retry;
4719 __ Bind(&retry);
4720
4721 // Read value from memory into an FP register and copy in into RAX.
4722 if (is64bit) {
4723 __ movsd(fptemp, field_addr);
4724 } else {
4725 __ movss(fptemp, field_addr);
4726 }
4727 __ movd(CpuRegister(RAX), fptemp, is64bit);
4728 // If necessary, byte swap RAX and update the value in FP register to also be byte-swapped.
4729 if (byte_swap) {
4730 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4731 __ movd(fptemp, CpuRegister(RAX), is64bit);
4732 }
4733 // Perform the FP addition and move it to a temporary register to prepare for CMPXCHG.
4734 if (is64bit) {
4735 __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
4736 } else {
4737 __ addss(fptemp, value.AsFpuRegister<XmmRegister>());
4738 }
4739 __ movd(temp, fptemp, is64bit);
4740 // If necessary, byte swap RAX before CMPXCHG and the temporary before copying to FP register.
4741 if (byte_swap) {
4742 codegen->GetInstructionCodegen()->Bswap(temp_loc, bswap_type);
4743 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4744 }
4745 if (is64bit) {
4746 __ LockCmpxchgq(field_addr, temp);
4747 } else {
4748 __ LockCmpxchgl(field_addr, temp);
4749 }
4750
4751 __ j(kNotZero, &retry);
4752
4753 // The old value is in RAX, byte swap if necessary.
4754 if (byte_swap) {
4755 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4756 }
4757 __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
4758 } else {
4759 if (byte_swap) {
4760 // We cannot use XADD since we need to byte-swap the old value when reading it from memory,
4761 // and then byte-swap the sum before writing it to memory. So fallback to the slower generic
4762 // implementation that is also used for bitwise operations.
4763 // Move value from RAX to a temporary register, as RAX may get clobbered by repeated CMPXCHG.
4764 DCHECK_EQ(GetExpectedVarHandleCoordinatesCount(invoke), 2u);
4765 Location temp = locations->GetTemp(temp_count - 2);
4766 codegen->Move(temp, value);
4767 GenerateVarHandleGetAndOp(
4768 invoke, codegen, temp, type, field_addr, GetAndUpdateOp::kAdd, byte_swap);
4769 } else {
4770 // `getAndAdd` for integral types: atomically exchange the new value with the field and add
4771 // the old value to the field. Output register is the same as the one holding new value. Do
4772 // sign extend / zero extend as needed.
4773 CpuRegister valreg = value.AsRegister<CpuRegister>();
4774 DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4775 switch (type) {
4776 case DataType::Type::kBool:
4777 case DataType::Type::kUint8:
4778 __ LockXaddb(field_addr, valreg);
4779 __ movzxb(valreg, valreg);
4780 break;
4781 case DataType::Type::kInt8:
4782 __ LockXaddb(field_addr, valreg);
4783 __ movsxb(valreg, valreg);
4784 break;
4785 case DataType::Type::kUint16:
4786 __ LockXaddw(field_addr, valreg);
4787 __ movzxw(valreg, valreg);
4788 break;
4789 case DataType::Type::kInt16:
4790 __ LockXaddw(field_addr, valreg);
4791 __ movsxw(valreg, valreg);
4792 break;
4793 case DataType::Type::kInt32:
4794 case DataType::Type::kUint32:
4795 __ LockXaddl(field_addr, valreg);
4796 break;
4797 case DataType::Type::kInt64:
4798 case DataType::Type::kUint64:
4799 __ LockXaddq(field_addr, valreg);
4800 break;
4801 default:
4802 LOG(FATAL) << "unexpected type in getAndAdd intrinsic";
4803 UNREACHABLE();
4804 }
4805 }
4806 }
4807 }
4808
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op,bool need_any_store_barrier,bool need_any_any_barrier,bool byte_swap=false)4809 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
4810 CodeGeneratorX86_64* codegen,
4811 GetAndUpdateOp get_and_update_op,
4812 bool need_any_store_barrier,
4813 bool need_any_any_barrier,
4814 bool byte_swap = false) {
4815 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4816
4817 X86_64Assembler* assembler = codegen->GetAssembler();
4818 LocationSummary* locations = invoke->GetLocations();
4819
4820 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4821 Location value = locations->InAt(number_of_arguments - 1);
4822 DataType::Type type = invoke->GetType();
4823
4824 VarHandleSlowPathX86_64* slow_path = nullptr;
4825 VarHandleTarget target = GetVarHandleTarget(invoke);
4826 if (!byte_swap) {
4827 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4828 GenerateVarHandleTarget(invoke, target, codegen);
4829 if (slow_path != nullptr) {
4830 slow_path->SetGetAndUpdateOp(get_and_update_op);
4831 slow_path->SetNeedAnyStoreBarrier(need_any_store_barrier);
4832 slow_path->SetNeedAnyAnyBarrier(need_any_any_barrier);
4833 __ Bind(slow_path->GetNativeByteOrderLabel());
4834 }
4835 }
4836
4837 CpuRegister ref(target.object);
4838 Address field_addr(ref, CpuRegister(target.offset), TIMES_1, 0);
4839
4840 if (need_any_store_barrier) {
4841 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4842 }
4843
4844 switch (get_and_update_op) {
4845 case GetAndUpdateOp::kSet:
4846 GenerateVarHandleGetAndSet(invoke, codegen, value, type, field_addr, ref, byte_swap);
4847 break;
4848 case GetAndUpdateOp::kAdd:
4849 GenerateVarHandleGetAndAdd(invoke, codegen, value, type, field_addr, byte_swap);
4850 break;
4851 case GetAndUpdateOp::kBitwiseAnd:
4852 case GetAndUpdateOp::kBitwiseOr:
4853 case GetAndUpdateOp::kBitwiseXor:
4854 GenerateVarHandleGetAndOp(
4855 invoke, codegen, value, type, field_addr, get_and_update_op, byte_swap);
4856 break;
4857 }
4858
4859 if (need_any_any_barrier) {
4860 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4861 }
4862
4863 if (slow_path != nullptr) {
4864 DCHECK(!byte_swap);
4865 __ Bind(slow_path->GetExitLabel());
4866 }
4867 }
4868
VisitVarHandleGetAndSet(HInvoke * invoke)4869 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4870 CreateVarHandleGetAndSetLocations(invoke, codegen_);
4871 }
4872
VisitVarHandleGetAndSet(HInvoke * invoke)4873 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4874 // `getAndSet` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4875 GenerateVarHandleGetAndUpdate(invoke,
4876 codegen_,
4877 GetAndUpdateOp::kSet,
4878 /*need_any_store_barrier=*/ true,
4879 /*need_any_any_barrier=*/ true);
4880 }
4881
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4882 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4883 CreateVarHandleGetAndSetLocations(invoke, codegen_);
4884 }
4885
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4886 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4887 // `getAndSetAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4888 GenerateVarHandleGetAndUpdate(invoke,
4889 codegen_,
4890 GetAndUpdateOp::kSet,
4891 /*need_any_store_barrier=*/ false,
4892 /*need_any_any_barrier=*/ false);
4893 }
4894
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4895 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4896 CreateVarHandleGetAndSetLocations(invoke, codegen_);
4897 }
4898
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4899 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4900 // `getAndSetRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4901 GenerateVarHandleGetAndUpdate(invoke,
4902 codegen_,
4903 GetAndUpdateOp::kSet,
4904 /*need_any_store_barrier=*/ true,
4905 /*need_any_any_barrier=*/ false);
4906 }
4907
VisitVarHandleGetAndAdd(HInvoke * invoke)4908 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4909 CreateVarHandleGetAndAddLocations(invoke, codegen_);
4910 }
4911
VisitVarHandleGetAndAdd(HInvoke * invoke)4912 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4913 // `getAndAdd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4914 GenerateVarHandleGetAndUpdate(invoke,
4915 codegen_,
4916 GetAndUpdateOp::kAdd,
4917 /*need_any_store_barrier=*/ true,
4918 /*need_any_any_barrier=*/ true);
4919 }
4920
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4921 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4922 CreateVarHandleGetAndAddLocations(invoke, codegen_);
4923 }
4924
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4925 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4926 // `getAndAddAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4927 GenerateVarHandleGetAndUpdate(invoke,
4928 codegen_,
4929 GetAndUpdateOp::kAdd,
4930 /*need_any_store_barrier=*/ false,
4931 /*need_any_any_barrier=*/ false);
4932 }
4933
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4934 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4935 CreateVarHandleGetAndAddLocations(invoke, codegen_);
4936 }
4937
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4938 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4939 // `getAndAddRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4940 GenerateVarHandleGetAndUpdate(invoke,
4941 codegen_,
4942 GetAndUpdateOp::kAdd,
4943 /*need_any_store_barrier=*/ true,
4944 /*need_any_any_barrier=*/ false);
4945 }
4946
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4947 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4948 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4949 }
4950
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4951 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4952 // `getAndBitwiseAnd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4953 GenerateVarHandleGetAndUpdate(invoke,
4954 codegen_,
4955 GetAndUpdateOp::kBitwiseAnd,
4956 /*need_any_store_barrier=*/ true,
4957 /*need_any_any_barrier=*/ true);
4958 }
4959
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4960 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4961 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4962 }
4963
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4964 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4965 // `getAndBitwiseAndAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4966 GenerateVarHandleGetAndUpdate(invoke,
4967 codegen_,
4968 GetAndUpdateOp::kBitwiseAnd,
4969 /*need_any_store_barrier=*/ false,
4970 /*need_any_any_barrier=*/ false);
4971 }
4972
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4973 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4974 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4975 }
4976
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4977 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4978 // `getAndBitwiseAndRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4979 GenerateVarHandleGetAndUpdate(invoke,
4980 codegen_,
4981 GetAndUpdateOp::kBitwiseAnd,
4982 /*need_any_store_barrier=*/ true,
4983 /*need_any_any_barrier=*/ false);
4984 }
4985
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4986 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4987 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4988 }
4989
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4990 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4991 // `getAndBitwiseOr` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4992 GenerateVarHandleGetAndUpdate(invoke,
4993 codegen_,
4994 GetAndUpdateOp::kBitwiseOr,
4995 /*need_any_store_barrier=*/ true,
4996 /*need_any_any_barrier=*/ true);
4997 }
4998
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4999 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5000 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5001 }
5002
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5003 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5004 // `getAndBitwiseOrAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5005 GenerateVarHandleGetAndUpdate(invoke,
5006 codegen_,
5007 GetAndUpdateOp::kBitwiseOr,
5008 /*need_any_store_barrier=*/ false,
5009 /*need_any_any_barrier=*/ false);
5010 }
5011
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5012 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5013 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5014 }
5015
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5016 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5017 // `getAndBitwiseOrRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5018 GenerateVarHandleGetAndUpdate(invoke,
5019 codegen_,
5020 GetAndUpdateOp::kBitwiseOr,
5021 /*need_any_store_barrier=*/ true,
5022 /*need_any_any_barrier=*/ false);
5023 }
5024
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5025 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5026 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5027 }
5028
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5029 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5030 // `getAndBitwiseXor` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5031 GenerateVarHandleGetAndUpdate(invoke,
5032 codegen_,
5033 GetAndUpdateOp::kBitwiseXor,
5034 /*need_any_store_barrier=*/ true,
5035 /*need_any_any_barrier=*/ true);
5036 }
5037
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5038 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5039 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5040 }
5041
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5042 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5043 // `getAndBitwiseXorAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5044 GenerateVarHandleGetAndUpdate(invoke,
5045 codegen_,
5046 GetAndUpdateOp::kBitwiseXor,
5047 /*need_any_store_barrier=*/ false,
5048 /*need_any_any_barrier=*/ false);
5049 }
5050
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5051 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5052 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5053 }
5054
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5055 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5056 // `getAndBitwiseXorRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5057 GenerateVarHandleGetAndUpdate(invoke,
5058 codegen_,
5059 GetAndUpdateOp::kBitwiseXor,
5060 /*need_any_store_barrier=*/ true,
5061 /*need_any_any_barrier=*/ false);
5062 }
5063
EmitByteArrayViewCode(CodeGeneratorX86_64 * codegen)5064 void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
5065 DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5066 X86_64Assembler* assembler = codegen->GetAssembler();
5067
5068 HInvoke* invoke = GetInvoke();
5069 LocationSummary* locations = invoke->GetLocations();
5070 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5071 DataType::Type value_type =
5072 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5073 DCHECK_NE(value_type, DataType::Type::kReference);
5074 size_t size = DataType::Size(value_type);
5075 DCHECK_GT(size, 1u);
5076
5077 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
5078 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
5079 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
5080 CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1).AsRegister<CpuRegister>();
5081
5082 MemberOffset class_offset = mirror::Object::ClassOffset();
5083 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5084 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5085 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5086
5087 VarHandleTarget target = GetVarHandleTarget(invoke);
5088
5089 __ Bind(GetByteArrayViewCheckLabel());
5090
5091 // The main path checked that the coordinateType0 is an array class that matches
5092 // the class of the actual coordinate argument but it does not match the value type.
5093 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5094 codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5095 assembler->MaybePoisonHeapReference(temp);
5096 __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
5097 __ j(kNotEqual, GetEntryLabel());
5098
5099 // Check for array index out of bounds.
5100 __ movl(temp, Address(object, array_length_offset.Int32Value()));
5101 // SUB sets flags in the same way as CMP.
5102 __ subl(temp, index);
5103 __ j(kBelowEqual, GetEntryLabel());
5104 // The difference between index and array length must be enough for the `value_type` size.
5105 __ cmpl(temp, Immediate(size));
5106 __ j(kBelow, GetEntryLabel());
5107
5108 // Construct the target.
5109 __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
5110
5111 // Alignment check. For unaligned access, go to the runtime.
5112 DCHECK(IsPowerOfTwo(size));
5113 __ testl(CpuRegister(target.offset), Immediate(size - 1u));
5114 __ j(kNotZero, GetEntryLabel());
5115
5116 // Byte order check. For native byte order return to the main path.
5117 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5118 IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5119 // There is no reason to differentiate between native byte order and byte-swap
5120 // for setting a zero bit pattern. Just return to the main path.
5121 __ jmp(GetNativeByteOrderLabel());
5122 return;
5123 }
5124 __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
5125 __ j(kNotEqual, GetNativeByteOrderLabel());
5126
5127 switch (access_mode_template) {
5128 case mirror::VarHandle::AccessModeTemplate::kGet:
5129 GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
5130 break;
5131 case mirror::VarHandle::AccessModeTemplate::kSet:
5132 GenerateVarHandleSet(invoke, codegen, is_volatile_, is_atomic_, /*byte_swap=*/ true);
5133 break;
5134 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5135 GenerateVarHandleCompareAndSetOrExchange(
5136 invoke, codegen, /*is_cmpxchg=*/ false, /*byte_swap=*/ true);
5137 break;
5138 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5139 GenerateVarHandleCompareAndSetOrExchange(
5140 invoke, codegen, /*is_cmpxchg=*/ true, /*byte_swap=*/ true);
5141 break;
5142 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5143 GenerateVarHandleGetAndUpdate(invoke,
5144 codegen,
5145 get_and_update_op_,
5146 need_any_store_barrier_,
5147 need_any_any_barrier_,
5148 /*byte_swap=*/ true);
5149 break;
5150 }
5151
5152 __ jmp(GetExitLabel());
5153 }
5154
5155 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
5156 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
5157 #undef MARK_UNIMPLEMENTED
5158
5159 UNREACHABLE_INTRINSICS(X86_64)
5160
5161 #undef __
5162
5163 } // namespace x86_64
5164 } // namespace art
5165