1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86_64.h"
18 
19 #include <limits>
20 
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsic_objects.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string.h"
35 #include "scoped_thread_state_change-inl.h"
36 #include "thread-current-inl.h"
37 #include "utils/x86_64/assembler_x86_64.h"
38 #include "utils/x86_64/constants_x86_64.h"
39 #include "well_known_classes.h"
40 
41 namespace art HIDDEN {
42 
43 namespace x86_64 {
44 
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)45 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
46   : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
47 }
48 
GetAssembler()49 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
50   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
51 }
52 
GetAllocator()53 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
54   return codegen_->GetGraph()->GetAllocator();
55 }
56 
TryDispatch(HInvoke * invoke)57 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
58   Dispatch(invoke);
59   LocationSummary* res = invoke->GetLocations();
60   if (res == nullptr) {
61     return false;
62   }
63   return res->Intrinsified();
64 }
65 
66 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
67 
68 #define __ assembler->
69 
GenArrayAddress(X86_64Assembler * assembler,CpuRegister dest,CpuRegister base,Location pos,DataType::Type type,uint32_t data_offset)70 static void GenArrayAddress(X86_64Assembler* assembler,
71                             CpuRegister dest,
72                             CpuRegister base,
73                             Location pos,
74                             DataType::Type type,
75                             uint32_t data_offset) {
76   // Note: The heap is in low 4GiB, so we're using LEAL rather than LEAQ to save on code size.
77   if (pos.IsConstant()) {
78     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
79     __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
80   } else {
81     const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
82     __ leal(dest, Address(base, pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
83   }
84 }
85 
86 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
87 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
88  public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)89   explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
90       : SlowPathCode(instruction) {
91   }
92 
EmitNativeCode(CodeGenerator * codegen)93   void EmitNativeCode(CodeGenerator* codegen) override {
94     DCHECK(codegen->EmitBakerReadBarrier());
95     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
96     X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
97     LocationSummary* locations = instruction_->GetLocations();
98     DCHECK(locations->CanCall());
99     DCHECK(instruction_->IsInvokeStaticOrDirect())
100         << "Unexpected instruction in read barrier arraycopy slow path: "
101         << instruction_->DebugName();
102     DCHECK(instruction_->GetLocations()->Intrinsified());
103     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
104     Location length = locations->InAt(4);
105 
106     const DataType::Type type = DataType::Type::kReference;
107     const int32_t element_size = DataType::Size(type);
108 
109     CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
110     CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
111     CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
112 
113     __ Bind(GetEntryLabel());
114     // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
115     GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
116 
117     NearLabel loop;
118     __ Bind(&loop);
119     __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
120     __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
121     // TODO: Inline the mark bit check before calling the runtime?
122     // TMP = ReadBarrier::Mark(TMP);
123     // No need to save live registers; it's taken care of by the
124     // entrypoint. Also, there is no need to update the stack mask,
125     // as this runtime call will not trigger a garbage collection.
126     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
127     // This runtime call does not require a stack map.
128     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
129     __ MaybePoisonHeapReference(CpuRegister(TMP));
130     __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
131     __ addl(src_curr_addr, Immediate(element_size));
132     __ addl(dst_curr_addr, Immediate(element_size));
133     __ cmpl(src_curr_addr, src_stop_addr);
134     __ j(kNotEqual, &loop);
135     __ jmp(GetExitLabel());
136   }
137 
GetDescription() const138   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
139 
140  private:
141   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
142 };
143 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)144 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
145   LocationSummary* locations =
146       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
147   locations->SetInAt(0, Location::RequiresFpuRegister());
148   locations->SetOut(Location::RequiresRegister());
149 }
150 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)151 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
152   LocationSummary* locations =
153       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
154   locations->SetInAt(0, Location::RequiresRegister());
155   locations->SetOut(Location::RequiresFpuRegister());
156 }
157 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)158 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
159   Location input = locations->InAt(0);
160   Location output = locations->Out();
161   __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
162 }
163 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)164 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
165   Location input = locations->InAt(0);
166   Location output = locations->Out();
167   __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
168 }
169 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)170 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
171   CreateFPToIntLocations(allocator_, invoke);
172 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)173 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
174   CreateIntToFPLocations(allocator_, invoke);
175 }
176 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)177 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
178   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
179 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)180 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
181   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
182 }
183 
VisitFloatFloatToRawIntBits(HInvoke * invoke)184 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
185   CreateFPToIntLocations(allocator_, invoke);
186 }
VisitFloatIntBitsToFloat(HInvoke * invoke)187 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
188   CreateIntToFPLocations(allocator_, invoke);
189 }
190 
VisitFloatFloatToRawIntBits(HInvoke * invoke)191 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
192   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
193 }
VisitFloatIntBitsToFloat(HInvoke * invoke)194 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
195   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
196 }
197 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)198 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
199   LocationSummary* locations =
200       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
201   locations->SetInAt(0, Location::RequiresRegister());
202   locations->SetOut(Location::SameAsFirstInput());
203 }
204 
VisitIntegerReverseBytes(HInvoke * invoke)205 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
206   CreateIntToIntLocations(allocator_, invoke);
207 }
208 
VisitIntegerReverseBytes(HInvoke * invoke)209 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
210   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
211 }
212 
VisitLongReverseBytes(HInvoke * invoke)213 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
214   CreateIntToIntLocations(allocator_, invoke);
215 }
216 
VisitLongReverseBytes(HInvoke * invoke)217 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
218   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
219 }
220 
VisitShortReverseBytes(HInvoke * invoke)221 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
222   CreateIntToIntLocations(allocator_, invoke);
223 }
224 
VisitShortReverseBytes(HInvoke * invoke)225 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
226   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
227 }
228 
GenIsInfinite(LocationSummary * locations,bool is64bit,CodeGeneratorX86_64 * codegen)229 static void GenIsInfinite(LocationSummary* locations,
230                           bool is64bit,
231                           CodeGeneratorX86_64* codegen) {
232   X86_64Assembler* assembler = codegen->GetAssembler();
233 
234   XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
235   CpuRegister output = locations->Out().AsRegister<CpuRegister>();
236 
237   NearLabel done1, done2;
238 
239   if (is64bit) {
240     double kPositiveInfinity = std::numeric_limits<double>::infinity();
241     double kNegativeInfinity = -1 * kPositiveInfinity;
242 
243     __ xorq(output, output);
244     __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
245     __ j(kNotEqual, &done1);
246     __ j(kParityEven, &done2);
247     __ movq(output, Immediate(1));
248     __ jmp(&done2);
249     __ Bind(&done1);
250     __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
251     __ j(kNotEqual, &done2);
252     __ j(kParityEven, &done2);
253     __ movq(output, Immediate(1));
254     __ Bind(&done2);
255   } else {
256     float kPositiveInfinity = std::numeric_limits<float>::infinity();
257     float kNegativeInfinity = -1 * kPositiveInfinity;
258 
259     __ xorl(output, output);
260     __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
261     __ j(kNotEqual, &done1);
262     __ j(kParityEven, &done2);
263     __ movl(output, Immediate(1));
264     __ jmp(&done2);
265     __ Bind(&done1);
266     __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
267     __ j(kNotEqual, &done2);
268     __ j(kParityEven, &done2);
269     __ movl(output, Immediate(1));
270     __ Bind(&done2);
271   }
272 }
273 
VisitFloatIsInfinite(HInvoke * invoke)274 void IntrinsicLocationsBuilderX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
275   CreateFPToIntLocations(allocator_, invoke);
276 }
277 
VisitFloatIsInfinite(HInvoke * invoke)278 void IntrinsicCodeGeneratorX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
279   GenIsInfinite(invoke->GetLocations(), /* is64bit=*/  false, codegen_);
280 }
281 
VisitDoubleIsInfinite(HInvoke * invoke)282 void IntrinsicLocationsBuilderX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
283   CreateFPToIntLocations(allocator_, invoke);
284 }
285 
VisitDoubleIsInfinite(HInvoke * invoke)286 void IntrinsicCodeGeneratorX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
287   GenIsInfinite(invoke->GetLocations(), /* is64bit=*/  true, codegen_);
288 }
289 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)290 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
291   LocationSummary* locations =
292       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
293   locations->SetInAt(0, Location::RequiresFpuRegister());
294   locations->SetOut(Location::RequiresFpuRegister());
295 }
296 
VisitMathSqrt(HInvoke * invoke)297 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
298   CreateFPToFPLocations(allocator_, invoke);
299 }
300 
VisitMathSqrt(HInvoke * invoke)301 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
302   LocationSummary* locations = invoke->GetLocations();
303   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
304   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
305 
306   GetAssembler()->sqrtsd(out, in);
307 }
308 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)309 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
310                                        HInvoke* invoke,
311                                        CodeGeneratorX86_64* codegen) {
312   // Do we have instruction support?
313   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
314     return;
315   }
316 
317   CreateFPToFPLocations(allocator, invoke);
318 }
319 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)320 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
321   LocationSummary* locations = invoke->GetLocations();
322   DCHECK(!locations->WillCall());
323   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
324   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
325   __ roundsd(out, in, Immediate(round_mode));
326 }
327 
VisitMathCeil(HInvoke * invoke)328 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
329   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
330 }
331 
VisitMathCeil(HInvoke * invoke)332 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
333   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
334 }
335 
VisitMathFloor(HInvoke * invoke)336 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
337   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
338 }
339 
VisitMathFloor(HInvoke * invoke)340 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
341   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
342 }
343 
VisitMathRint(HInvoke * invoke)344 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
345   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
346 }
347 
VisitMathRint(HInvoke * invoke)348 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
349   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
350 }
351 
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)352 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
353                                         HInvoke* invoke,
354                                         CodeGeneratorX86_64* codegen) {
355   // Do we have instruction support?
356   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
357     return;
358   }
359 
360   LocationSummary* locations =
361       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
362   locations->SetInAt(0, Location::RequiresFpuRegister());
363   locations->SetOut(Location::RequiresRegister());
364   locations->AddTemp(Location::RequiresFpuRegister());
365   locations->AddTemp(Location::RequiresFpuRegister());
366 }
367 
VisitMathRoundFloat(HInvoke * invoke)368 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
369   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
370 }
371 
VisitMathRoundFloat(HInvoke * invoke)372 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
373   LocationSummary* locations = invoke->GetLocations();
374   DCHECK(!locations->WillCall());
375 
376   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
377   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
378   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
379   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
380   NearLabel skip_incr, done;
381   X86_64Assembler* assembler = GetAssembler();
382 
383   // Since no direct x86 rounding instruction matches the required semantics,
384   // this intrinsic is implemented as follows:
385   //  result = floor(in);
386   //  if (in - result >= 0.5f)
387   //    result = result + 1.0f;
388   __ movss(t2, in);
389   __ roundss(t1, in, Immediate(1));
390   __ subss(t2, t1);
391   __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
392   __ j(kBelow, &skip_incr);
393   __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
394   __ Bind(&skip_incr);
395 
396   // Final conversion to an integer. Unfortunately this also does not have a
397   // direct x86 instruction, since NaN should map to 0 and large positive
398   // values need to be clipped to the extreme value.
399   codegen_->Load32BitValue(out, kPrimIntMax);
400   __ cvtsi2ss(t2, out);
401   __ comiss(t1, t2);
402   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
403   __ movl(out, Immediate(0));  // does not change flags
404   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
405   __ cvttss2si(out, t1);
406   __ Bind(&done);
407 }
408 
VisitMathRoundDouble(HInvoke * invoke)409 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
410   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
411 }
412 
VisitMathRoundDouble(HInvoke * invoke)413 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
414   LocationSummary* locations = invoke->GetLocations();
415   DCHECK(!locations->WillCall());
416 
417   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
418   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
419   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
420   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
421   NearLabel skip_incr, done;
422   X86_64Assembler* assembler = GetAssembler();
423 
424   // Since no direct x86 rounding instruction matches the required semantics,
425   // this intrinsic is implemented as follows:
426   //  result = floor(in);
427   //  if (in - result >= 0.5)
428   //    result = result + 1.0f;
429   __ movsd(t2, in);
430   __ roundsd(t1, in, Immediate(1));
431   __ subsd(t2, t1);
432   __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
433   __ j(kBelow, &skip_incr);
434   __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
435   __ Bind(&skip_incr);
436 
437   // Final conversion to an integer. Unfortunately this also does not have a
438   // direct x86 instruction, since NaN should map to 0 and large positive
439   // values need to be clipped to the extreme value.
440   codegen_->Load64BitValue(out, kPrimLongMax);
441   __ cvtsi2sd(t2, out, /* is64bit= */ true);
442   __ comisd(t1, t2);
443   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
444   __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
445   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
446   __ cvttsd2si(out, t1, /* is64bit= */ true);
447   __ Bind(&done);
448 }
449 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)450 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
451   LocationSummary* locations =
452       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
453   InvokeRuntimeCallingConvention calling_convention;
454   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
455   locations->SetOut(Location::FpuRegisterLocation(XMM0));
456 
457   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
458 }
459 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)460 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
461                           QuickEntrypointEnum entry) {
462   LocationSummary* locations = invoke->GetLocations();
463   DCHECK(locations->WillCall());
464   DCHECK(invoke->IsInvokeStaticOrDirect());
465 
466   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
467 }
468 
VisitMathCos(HInvoke * invoke)469 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
470   CreateFPToFPCallLocations(allocator_, invoke);
471 }
472 
VisitMathCos(HInvoke * invoke)473 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
474   GenFPToFPCall(invoke, codegen_, kQuickCos);
475 }
476 
VisitMathSin(HInvoke * invoke)477 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
478   CreateFPToFPCallLocations(allocator_, invoke);
479 }
480 
VisitMathSin(HInvoke * invoke)481 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
482   GenFPToFPCall(invoke, codegen_, kQuickSin);
483 }
484 
VisitMathAcos(HInvoke * invoke)485 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
486   CreateFPToFPCallLocations(allocator_, invoke);
487 }
488 
VisitMathAcos(HInvoke * invoke)489 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
490   GenFPToFPCall(invoke, codegen_, kQuickAcos);
491 }
492 
VisitMathAsin(HInvoke * invoke)493 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
494   CreateFPToFPCallLocations(allocator_, invoke);
495 }
496 
VisitMathAsin(HInvoke * invoke)497 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
498   GenFPToFPCall(invoke, codegen_, kQuickAsin);
499 }
500 
VisitMathAtan(HInvoke * invoke)501 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
502   CreateFPToFPCallLocations(allocator_, invoke);
503 }
504 
VisitMathAtan(HInvoke * invoke)505 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
506   GenFPToFPCall(invoke, codegen_, kQuickAtan);
507 }
508 
VisitMathCbrt(HInvoke * invoke)509 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
510   CreateFPToFPCallLocations(allocator_, invoke);
511 }
512 
VisitMathCbrt(HInvoke * invoke)513 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
514   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
515 }
516 
VisitMathCosh(HInvoke * invoke)517 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
518   CreateFPToFPCallLocations(allocator_, invoke);
519 }
520 
VisitMathCosh(HInvoke * invoke)521 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
522   GenFPToFPCall(invoke, codegen_, kQuickCosh);
523 }
524 
VisitMathExp(HInvoke * invoke)525 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
526   CreateFPToFPCallLocations(allocator_, invoke);
527 }
528 
VisitMathExp(HInvoke * invoke)529 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
530   GenFPToFPCall(invoke, codegen_, kQuickExp);
531 }
532 
VisitMathExpm1(HInvoke * invoke)533 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
534   CreateFPToFPCallLocations(allocator_, invoke);
535 }
536 
VisitMathExpm1(HInvoke * invoke)537 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
538   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
539 }
540 
VisitMathLog(HInvoke * invoke)541 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
542   CreateFPToFPCallLocations(allocator_, invoke);
543 }
544 
VisitMathLog(HInvoke * invoke)545 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
546   GenFPToFPCall(invoke, codegen_, kQuickLog);
547 }
548 
VisitMathLog10(HInvoke * invoke)549 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
550   CreateFPToFPCallLocations(allocator_, invoke);
551 }
552 
VisitMathLog10(HInvoke * invoke)553 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
554   GenFPToFPCall(invoke, codegen_, kQuickLog10);
555 }
556 
VisitMathSinh(HInvoke * invoke)557 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
558   CreateFPToFPCallLocations(allocator_, invoke);
559 }
560 
VisitMathSinh(HInvoke * invoke)561 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
562   GenFPToFPCall(invoke, codegen_, kQuickSinh);
563 }
564 
VisitMathTan(HInvoke * invoke)565 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
566   CreateFPToFPCallLocations(allocator_, invoke);
567 }
568 
VisitMathTan(HInvoke * invoke)569 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
570   GenFPToFPCall(invoke, codegen_, kQuickTan);
571 }
572 
VisitMathTanh(HInvoke * invoke)573 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
574   CreateFPToFPCallLocations(allocator_, invoke);
575 }
576 
VisitMathTanh(HInvoke * invoke)577 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
578   GenFPToFPCall(invoke, codegen_, kQuickTanh);
579 }
580 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)581 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
582   LocationSummary* locations =
583       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
584   InvokeRuntimeCallingConvention calling_convention;
585   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
586   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
587   locations->SetOut(Location::FpuRegisterLocation(XMM0));
588 
589   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
590 }
591 
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)592 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
593   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
594   LocationSummary* locations =
595       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
596   InvokeRuntimeCallingConvention calling_convention;
597   locations->SetInAt(0, Location::RequiresFpuRegister());
598   locations->SetInAt(1, Location::RequiresFpuRegister());
599   locations->SetInAt(2, Location::RequiresFpuRegister());
600   locations->SetOut(Location::SameAsFirstInput());
601 }
602 
VisitMathAtan2(HInvoke * invoke)603 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
604   CreateFPFPToFPCallLocations(allocator_, invoke);
605 }
606 
VisitMathAtan2(HInvoke * invoke)607 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
608   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
609 }
610 
VisitMathPow(HInvoke * invoke)611 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
612   CreateFPFPToFPCallLocations(allocator_, invoke);
613 }
614 
VisitMathPow(HInvoke * invoke)615 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
616   GenFPToFPCall(invoke, codegen_, kQuickPow);
617 }
618 
VisitMathHypot(HInvoke * invoke)619 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
620   CreateFPFPToFPCallLocations(allocator_, invoke);
621 }
622 
VisitMathHypot(HInvoke * invoke)623 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
624   GenFPToFPCall(invoke, codegen_, kQuickHypot);
625 }
626 
VisitMathNextAfter(HInvoke * invoke)627 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
628   CreateFPFPToFPCallLocations(allocator_, invoke);
629 }
630 
VisitMathNextAfter(HInvoke * invoke)631 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
632   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
633 }
634 
CreateSystemArrayCopyLocations(HInvoke * invoke)635 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
636   // Check to see if we have known failures that will cause us to have to bail out
637   // to the runtime, and just generate the runtime call directly.
638   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
639   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
640 
641   // The positions must be non-negative.
642   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
643       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
644     // We will have to fail anyways.
645     return;
646   }
647 
648   // The length must be > 0.
649   HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
650   if (length != nullptr) {
651     int32_t len = length->GetValue();
652     if (len < 0) {
653       // Just call as normal.
654       return;
655     }
656   }
657   LocationSummary* locations =
658       new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary
659       (invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
660   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
661   locations->SetInAt(0, Location::RequiresRegister());
662   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
663   locations->SetInAt(2, Location::RequiresRegister());
664   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
665   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
666 
667   // And we need some temporaries.  We will use REP MOVS{B,W,L}, so we need fixed registers.
668   locations->AddTemp(Location::RegisterLocation(RSI));
669   locations->AddTemp(Location::RegisterLocation(RDI));
670   locations->AddTemp(Location::RegisterLocation(RCX));
671 }
672 
673 template <typename LhsType>
EmitCmplJLess(X86_64Assembler * assembler,LhsType lhs,Location rhs,Label * label)674 static void EmitCmplJLess(X86_64Assembler* assembler,
675                           LhsType lhs,
676                           Location rhs,
677                           Label* label) {
678   static_assert(std::is_same_v<LhsType, CpuRegister> || std::is_same_v<LhsType, Address>);
679   if (rhs.IsConstant()) {
680     int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
681     __ cmpl(lhs, Immediate(rhs_constant));
682   } else {
683     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
684   }
685   __ j(kLess, label);
686 }
687 
CheckSystemArrayCopyPosition(X86_64Assembler * assembler,CpuRegister array,Location pos,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_array_length,bool position_sign_checked)688 static void CheckSystemArrayCopyPosition(X86_64Assembler* assembler,
689                                          CpuRegister array,
690                                          Location pos,
691                                          Location length,
692                                          SlowPathCode* slow_path,
693                                          CpuRegister temp,
694                                          bool length_is_array_length,
695                                          bool position_sign_checked) {
696   // Where is the length in the Array?
697   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
698 
699   if (pos.IsConstant()) {
700     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
701     if (pos_const == 0) {
702       if (!length_is_array_length) {
703         // Check that length(array) >= length.
704         EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
705       }
706     } else {
707       // Calculate length(array) - pos.
708       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
709       // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
710       __ movl(temp, Address(array, length_offset));
711       __ subl(temp, Immediate(pos_const));
712 
713       // Check that (length(array) - pos) >= length.
714       EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
715     }
716   } else if (length_is_array_length) {
717     // The only way the copy can succeed is if pos is zero.
718     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
719     __ testl(pos_reg, pos_reg);
720     __ j(kNotEqual, slow_path->GetEntryLabel());
721   } else {
722     // Check that pos >= 0.
723     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
724     if (!position_sign_checked) {
725       __ testl(pos_reg, pos_reg);
726       __ j(kLess, slow_path->GetEntryLabel());
727     }
728 
729     // Calculate length(array) - pos.
730     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
731     // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
732     __ movl(temp, Address(array, length_offset));
733     __ subl(temp, pos_reg);
734 
735     // Check that (length(array) - pos) >= length.
736     EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
737   }
738 }
739 
SystemArrayCopyPrimitive(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,DataType::Type type)740 static void SystemArrayCopyPrimitive(HInvoke* invoke,
741                                      X86_64Assembler* assembler,
742                                      CodeGeneratorX86_64* codegen,
743                                      DataType::Type type) {
744   LocationSummary* locations = invoke->GetLocations();
745   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
746   Location src_pos = locations->InAt(1);
747   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
748   Location dest_pos = locations->InAt(3);
749   Location length = locations->InAt(4);
750 
751   // Temporaries that we need for MOVSB/W/L.
752   CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
753   DCHECK_EQ(src_base.AsRegister(), RSI);
754   CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
755   DCHECK_EQ(dest_base.AsRegister(), RDI);
756   CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
757   DCHECK_EQ(count.AsRegister(), RCX);
758 
759   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
760   codegen->AddSlowPath(slow_path);
761 
762   // Bail out if the source and destination are the same.
763   __ cmpl(src, dest);
764   __ j(kEqual, slow_path->GetEntryLabel());
765 
766   // Bail out if the source is null.
767   __ testl(src, src);
768   __ j(kEqual, slow_path->GetEntryLabel());
769 
770   // Bail out if the destination is null.
771   __ testl(dest, dest);
772   __ j(kEqual, slow_path->GetEntryLabel());
773 
774   // If the length is negative, bail out.
775   // We have already checked in the LocationsBuilder for the constant case.
776   if (!length.IsConstant()) {
777     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
778     __ j(kLess, slow_path->GetEntryLabel());
779   }
780 
781   // Validity checks: source. Use src_base as a temporary register.
782   CheckSystemArrayCopyPosition(assembler,
783                                src,
784                                src_pos,
785                                length,
786                                slow_path,
787                                src_base,
788                                /*length_is_array_length=*/ false,
789                                /*position_sign_checked=*/ false);
790 
791   // Validity checks: dest. Use src_base as a temporary register.
792   CheckSystemArrayCopyPosition(assembler,
793                                dest,
794                                dest_pos,
795                                length,
796                                slow_path,
797                                src_base,
798                                /*length_is_array_length=*/ false,
799                                /*position_sign_checked=*/ false);
800 
801   // We need the count in RCX.
802   if (length.IsConstant()) {
803     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
804   } else {
805     __ movl(count, length.AsRegister<CpuRegister>());
806   }
807 
808   // Okay, everything checks out.  Finally time to do the copy.
809   // Check assumption that sizeof(Char) is 2 (used in scaling below).
810   const size_t data_size = DataType::Size(type);
811   const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
812 
813   GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
814   GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
815 
816   // Do the move.
817   switch (type) {
818     case DataType::Type::kInt8:
819        __ rep_movsb();
820        break;
821     case DataType::Type::kUint16:
822        __ rep_movsw();
823        break;
824     case DataType::Type::kInt32:
825        __ rep_movsl();
826        break;
827     default:
828        LOG(FATAL) << "Unexpected data type for intrinsic";
829   }
830   __ Bind(slow_path->GetExitLabel());
831 }
832 
VisitSystemArrayCopyChar(HInvoke * invoke)833 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
834   CreateSystemArrayCopyLocations(invoke);
835 }
VisitSystemArrayCopyChar(HInvoke * invoke)836 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
837   X86_64Assembler* assembler = GetAssembler();
838   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
839 }
840 
VisitSystemArrayCopyByte(HInvoke * invoke)841 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
842   X86_64Assembler* assembler = GetAssembler();
843   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
844 }
845 
VisitSystemArrayCopyByte(HInvoke * invoke)846 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
847   CreateSystemArrayCopyLocations(invoke);
848 }
849 
VisitSystemArrayCopyInt(HInvoke * invoke)850 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
851   X86_64Assembler* assembler = GetAssembler();
852   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
853 }
854 
VisitSystemArrayCopyInt(HInvoke * invoke)855 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
856   CreateSystemArrayCopyLocations(invoke);
857 }
858 
VisitSystemArrayCopy(HInvoke * invoke)859 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
860   // The only read barrier implementation supporting the
861   // SystemArrayCopy intrinsic is the Baker-style read barriers.
862   if (codegen_->EmitNonBakerReadBarrier()) {
863     return;
864   }
865 
866   constexpr int32_t kLengthThreshold = -1;  // No cut-off - handle large arrays in intrinsic code.
867   constexpr size_t kInitialNumTemps = 0u;  // We shall allocate temps explicitly.
868   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
869       invoke, kLengthThreshold, kInitialNumTemps);
870   if (locations != nullptr) {
871     // Add temporaries.  We will use REP MOVSL, so we need fixed registers.
872     DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
873     locations->AddTemp(Location::RegisterLocation(RSI));
874     locations->AddTemp(Location::RegisterLocation(RDI));
875     locations->AddTemp(Location::RegisterLocation(RCX));
876   }
877 }
878 
VisitSystemArrayCopy(HInvoke * invoke)879 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
880   // The only read barrier implementation supporting the
881   // SystemArrayCopy intrinsic is the Baker-style read barriers.
882   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
883 
884   X86_64Assembler* assembler = GetAssembler();
885   LocationSummary* locations = invoke->GetLocations();
886 
887   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
888   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
889   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
890   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
891   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
892 
893   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
894   Location src_pos = locations->InAt(1);
895   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
896   Location dest_pos = locations->InAt(3);
897   Location length = locations->InAt(4);
898   Location temp1_loc = locations->GetTemp(0);
899   CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
900   Location temp2_loc = locations->GetTemp(1);
901   CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
902   Location temp3_loc = locations->GetTemp(2);
903   CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
904 
905   SlowPathCode* intrinsic_slow_path =
906       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
907   codegen_->AddSlowPath(intrinsic_slow_path);
908 
909   NearLabel conditions_on_positions_validated;
910   SystemArrayCopyOptimizations optimizations(invoke);
911 
912   // If source and destination are the same, we go to slow path if we need to do forward copying.
913   // We do not need to do this check if the source and destination positions are the same.
914   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
915     if (src_pos.IsConstant()) {
916       int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
917       if (dest_pos.IsConstant()) {
918         int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
919         if (optimizations.GetDestinationIsSource()) {
920           // Checked when building locations.
921           DCHECK_GE(src_pos_constant, dest_pos_constant);
922         } else if (src_pos_constant < dest_pos_constant) {
923           __ cmpl(src, dest);
924           __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
925         }
926       } else {
927         if (!optimizations.GetDestinationIsSource()) {
928           __ cmpl(src, dest);
929           __ j(kNotEqual, &conditions_on_positions_validated);
930         }
931         __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
932         __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
933       }
934     } else {
935       if (!optimizations.GetDestinationIsSource()) {
936         __ cmpl(src, dest);
937         __ j(kNotEqual, &conditions_on_positions_validated);
938       }
939       CpuRegister src_pos_reg = src_pos.AsRegister<CpuRegister>();
940       EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
941     }
942   }
943 
944   __ Bind(&conditions_on_positions_validated);
945 
946   if (!optimizations.GetSourceIsNotNull()) {
947     // Bail out if the source is null.
948     __ testl(src, src);
949     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
950   }
951 
952   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
953     // Bail out if the destination is null.
954     __ testl(dest, dest);
955     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
956   }
957 
958   // If the length is negative, bail out.
959   // We have already checked in the LocationsBuilder for the constant case.
960   if (!length.IsConstant() &&
961       !optimizations.GetCountIsSourceLength() &&
962       !optimizations.GetCountIsDestinationLength()) {
963     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
964     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
965   }
966 
967   // Validity checks: source.
968   CheckSystemArrayCopyPosition(assembler,
969                                src,
970                                src_pos,
971                                length,
972                                intrinsic_slow_path,
973                                temp1,
974                                optimizations.GetCountIsSourceLength(),
975                                /*position_sign_checked=*/ false);
976 
977   // Validity checks: dest.
978   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
979   CheckSystemArrayCopyPosition(assembler,
980                                dest,
981                                dest_pos,
982                                length,
983                                intrinsic_slow_path,
984                                temp1,
985                                optimizations.GetCountIsDestinationLength(),
986                                dest_position_sign_checked);
987 
988   auto check_non_primitive_array_class = [&](CpuRegister klass, CpuRegister temp) {
989     // No read barrier is needed for reading a chain of constant references for comparing
990     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
991     // /* HeapReference<Class> */ temp = klass->component_type_
992     __ movl(temp, Address(klass, component_offset));
993     __ MaybeUnpoisonHeapReference(temp);
994     // Check that the component type is not null.
995     __ testl(temp, temp);
996     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
997     // Check that the component type is not a primitive.
998     __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
999     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1000   };
1001 
1002   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1003     // Check whether all elements of the source array are assignable to the component
1004     // type of the destination array. We do two checks: the classes are the same,
1005     // or the destination is Object[]. If none of these checks succeed, we go to the
1006     // slow path.
1007 
1008     if (codegen_->EmitBakerReadBarrier()) {
1009       // /* HeapReference<Class> */ temp1 = dest->klass_
1010       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1011           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
1012       // Register `temp1` is not trashed by the read barrier emitted
1013       // by GenerateFieldLoadWithBakerReadBarrier below, as that
1014       // method produces a call to a ReadBarrierMarkRegX entry point,
1015       // which saves all potentially live registers, including
1016       // temporaries such a `temp1`.
1017       // /* HeapReference<Class> */ temp2 = src->klass_
1018       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1019           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
1020       // If heap poisoning is enabled, `temp1` and `temp2` have been unpoisoned
1021       // by the previous calls to GenerateFieldLoadWithBakerReadBarrier.
1022     } else {
1023       // /* HeapReference<Class> */ temp1 = dest->klass_
1024       __ movl(temp1, Address(dest, class_offset));
1025       __ MaybeUnpoisonHeapReference(temp1);
1026       // /* HeapReference<Class> */ temp2 = src->klass_
1027       __ movl(temp2, Address(src, class_offset));
1028       __ MaybeUnpoisonHeapReference(temp2);
1029     }
1030 
1031     __ cmpl(temp1, temp2);
1032     if (optimizations.GetDestinationIsTypedObjectArray()) {
1033       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1034       NearLabel do_copy;
1035       // For class match, we can skip the source type check regardless of the optimization flag.
1036       __ j(kEqual, &do_copy);
1037       // No read barrier is needed for reading a chain of constant references
1038       // for comparing with null, see `ReadBarrierOption`.
1039       // /* HeapReference<Class> */ temp1 = temp1->component_type_
1040       __ movl(temp1, Address(temp1, component_offset));
1041       __ MaybeUnpoisonHeapReference(temp1);
1042       // No need to unpoison the following heap reference load, as
1043       // we're comparing against null.
1044       __ cmpl(Address(temp1, super_offset), Immediate(0));
1045       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1046       // Bail out if the source is not a non primitive array.
1047       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1048         check_non_primitive_array_class(temp2, CpuRegister(TMP));
1049       }
1050       __ Bind(&do_copy);
1051     } else {
1052       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1053       // For class match, we can skip the array type check completely if at least one of source
1054       // and destination is known to be a non primitive array, otherwise one check is enough.
1055       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1056       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1057           !optimizations.GetSourceIsNonPrimitiveArray()) {
1058         check_non_primitive_array_class(temp2, CpuRegister(TMP));
1059       }
1060     }
1061   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1062     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1063     // Bail out if the source is not a non primitive array.
1064     // No read barrier is needed for reading a chain of constant references for comparing
1065     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1066     // /* HeapReference<Class> */ temp1 = src->klass_
1067     __ movl(temp1, Address(src, class_offset));
1068     __ MaybeUnpoisonHeapReference(temp1);
1069     check_non_primitive_array_class(temp1, CpuRegister(TMP));
1070   }
1071 
1072   if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1073     // Null constant length: not need to emit the loop code at all.
1074   } else {
1075     const DataType::Type type = DataType::Type::kReference;
1076     const int32_t element_size = DataType::Size(type);
1077     const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1078 
1079     // Don't enter copy loop if `length == 0`.
1080     NearLabel skip_copy_and_write_barrier;
1081     if (!length.IsConstant()) {
1082       __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1083       __ j(kEqual, &skip_copy_and_write_barrier);
1084     }
1085 
1086     // Compute base source address, base destination address, and end
1087     // source address in `temp1`, `temp2` and `temp3` respectively.
1088     GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
1089     GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
1090 
1091     SlowPathCode* read_barrier_slow_path = nullptr;
1092     if (codegen_->EmitBakerReadBarrier()) {
1093       // SystemArrayCopy implementation for Baker read barriers (see
1094       // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1095       //
1096       //   if (src_ptr != end_ptr) {
1097       //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1098       //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1099       //     bool is_gray = (rb_state == ReadBarrier::GrayState());
1100       //     if (is_gray) {
1101       //       // Slow-path copy.
1102       //       do {
1103       //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1104       //       } while (src_ptr != end_ptr)
1105       //     } else {
1106       //       // Fast-path copy.
1107       //       do {
1108       //         *dest_ptr++ = *src_ptr++;
1109       //       } while (src_ptr != end_ptr)
1110       //     }
1111       //   }
1112 
1113       // Given the numeric representation, it's enough to check the low bit of the rb_state.
1114       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1115       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1116       constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1117       constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1118       constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1119 
1120       // if (rb_state == ReadBarrier::GrayState())
1121       //   goto slow_path;
1122       // At this point, just do the "if" and make sure that flags are preserved until the branch.
1123       __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1124 
1125       // Load fence to prevent load-load reordering.
1126       // Note that this is a no-op, thanks to the x86-64 memory model.
1127       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1128 
1129       // Slow path used to copy array when `src` is gray.
1130       read_barrier_slow_path =
1131           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1132       codegen_->AddSlowPath(read_barrier_slow_path);
1133 
1134       // We have done the "if" of the gray bit check above, now branch based on the flags.
1135       __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1136     }
1137 
1138     if (length.IsConstant()) {
1139       __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1140     } else {
1141       __ movl(temp3, length.AsRegister<CpuRegister>());
1142     }
1143 
1144     // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
1145     DCHECK_EQ(temp1.AsRegister(), RSI);
1146     DCHECK_EQ(temp2.AsRegister(), RDI);
1147     DCHECK_EQ(temp3.AsRegister(), RCX);
1148     __ rep_movsl();
1149 
1150     if (read_barrier_slow_path != nullptr) {
1151       DCHECK(codegen_->EmitBakerReadBarrier());
1152       __ Bind(read_barrier_slow_path->GetExitLabel());
1153     }
1154 
1155     // We only need one card marking on the destination array.
1156     codegen_->MarkGCCard(temp1, temp2, dest);
1157 
1158     __ Bind(&skip_copy_and_write_barrier);
1159   }
1160 
1161   __ Bind(intrinsic_slow_path->GetExitLabel());
1162 }
1163 
VisitStringCompareTo(HInvoke * invoke)1164 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1165   LocationSummary* locations = new (allocator_) LocationSummary(
1166       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1167   InvokeRuntimeCallingConvention calling_convention;
1168   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1169   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1170   locations->SetOut(Location::RegisterLocation(RAX));
1171 }
1172 
VisitStringCompareTo(HInvoke * invoke)1173 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1174   X86_64Assembler* assembler = GetAssembler();
1175   LocationSummary* locations = invoke->GetLocations();
1176 
1177   // Note that the null check must have been done earlier.
1178   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1179 
1180   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1181   __ testl(argument, argument);
1182   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1183   codegen_->AddSlowPath(slow_path);
1184   __ j(kEqual, slow_path->GetEntryLabel());
1185 
1186   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1187   __ Bind(slow_path->GetExitLabel());
1188 }
1189 
VisitStringEquals(HInvoke * invoke)1190 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1191   LocationSummary* locations =
1192       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1193   locations->SetInAt(0, Location::RequiresRegister());
1194   locations->SetInAt(1, Location::RequiresRegister());
1195 
1196   // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1197   locations->AddTemp(Location::RegisterLocation(RCX));
1198   locations->AddTemp(Location::RegisterLocation(RDI));
1199 
1200   // Set output, RSI needed for repe_cmpsq instruction anyways.
1201   locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1202 }
1203 
VisitStringEquals(HInvoke * invoke)1204 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1205   X86_64Assembler* assembler = GetAssembler();
1206   LocationSummary* locations = invoke->GetLocations();
1207 
1208   CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1209   CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1210   CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1211   CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1212   CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1213 
1214   NearLabel end, return_true, return_false;
1215 
1216   // Get offsets of count, value, and class fields within a string object.
1217   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1218   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1219   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1220 
1221   // Note that the null check must have been done earlier.
1222   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1223 
1224   StringEqualsOptimizations optimizations(invoke);
1225   if (!optimizations.GetArgumentNotNull()) {
1226     // Check if input is null, return false if it is.
1227     __ testl(arg, arg);
1228     __ j(kEqual, &return_false);
1229   }
1230 
1231   if (!optimizations.GetArgumentIsString()) {
1232     // Instanceof check for the argument by comparing class fields.
1233     // All string objects must have the same type since String cannot be subclassed.
1234     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1235     // If the argument is a string object, its class field must be equal to receiver's class field.
1236     //
1237     // As the String class is expected to be non-movable, we can read the class
1238     // field from String.equals' arguments without read barriers.
1239     AssertNonMovableStringClass();
1240     // Also, because we use the loaded class references only to compare them, we
1241     // don't need to unpoison them.
1242     // /* HeapReference<Class> */ rcx = str->klass_
1243     __ movl(rcx, Address(str, class_offset));
1244     // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1245     __ cmpl(rcx, Address(arg, class_offset));
1246     __ j(kNotEqual, &return_false);
1247   }
1248 
1249   // Reference equality check, return true if same reference.
1250   __ cmpl(str, arg);
1251   __ j(kEqual, &return_true);
1252 
1253   // Load length and compression flag of receiver string.
1254   __ movl(rcx, Address(str, count_offset));
1255   // Check if lengths and compressiond flags are equal, return false if they're not.
1256   // Two identical strings will always have same compression style since
1257   // compression style is decided on alloc.
1258   __ cmpl(rcx, Address(arg, count_offset));
1259   __ j(kNotEqual, &return_false);
1260   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1261   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1262                 "Expecting 0=compressed, 1=uncompressed");
1263   __ jrcxz(&return_true);
1264 
1265   if (mirror::kUseStringCompression) {
1266     NearLabel string_uncompressed;
1267     // Extract length and differentiate between both compressed or both uncompressed.
1268     // Different compression style is cut above.
1269     __ shrl(rcx, Immediate(1));
1270     __ j(kCarrySet, &string_uncompressed);
1271     // Divide string length by 2, rounding up, and continue as if uncompressed.
1272     // Merge clearing the compression flag with +1 for rounding.
1273     __ addl(rcx, Immediate(1));
1274     __ shrl(rcx, Immediate(1));
1275     __ Bind(&string_uncompressed);
1276   }
1277   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1278   __ leal(rsi, Address(str, value_offset));
1279   __ leal(rdi, Address(arg, value_offset));
1280 
1281   // Divide string length by 4 and adjust for lengths not divisible by 4.
1282   __ addl(rcx, Immediate(3));
1283   __ shrl(rcx, Immediate(2));
1284 
1285   // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1286   // or 8 characters (compressed) at a time.
1287   DCHECK_ALIGNED(value_offset, 8);
1288   static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1289 
1290   // Loop to compare strings four characters at a time starting at the beginning of the string.
1291   __ repe_cmpsq();
1292   // If strings are not equal, zero flag will be cleared.
1293   __ j(kNotEqual, &return_false);
1294 
1295   // Return true and exit the function.
1296   // If loop does not result in returning false, we return true.
1297   __ Bind(&return_true);
1298   __ movl(rsi, Immediate(1));
1299   __ jmp(&end);
1300 
1301   // Return false and exit the function.
1302   __ Bind(&return_false);
1303   __ xorl(rsi, rsi);
1304   __ Bind(&end);
1305 }
1306 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1307 static void CreateStringIndexOfLocations(HInvoke* invoke,
1308                                          ArenaAllocator* allocator,
1309                                          bool start_at_zero) {
1310   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1311                                                                LocationSummary::kCallOnSlowPath,
1312                                                                kIntrinsified);
1313   // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1314   locations->SetInAt(0, Location::RegisterLocation(RDI));
1315   // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1316   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1317   // of the instruction explicitly.
1318   // Note: This works as we don't clobber RAX anywhere.
1319   locations->SetInAt(1, Location::RegisterLocation(RAX));
1320   if (!start_at_zero) {
1321     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1322   }
1323   // As we clobber RDI during execution anyways, also use it as the output.
1324   locations->SetOut(Location::SameAsFirstInput());
1325 
1326   // repne scasw uses RCX as the counter.
1327   locations->AddTemp(Location::RegisterLocation(RCX));
1328   // Need another temporary to be able to compute the result.
1329   locations->AddTemp(Location::RequiresRegister());
1330 }
1331 
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1332 static void GenerateStringIndexOf(HInvoke* invoke,
1333                                   X86_64Assembler* assembler,
1334                                   CodeGeneratorX86_64* codegen,
1335                                   bool start_at_zero) {
1336   LocationSummary* locations = invoke->GetLocations();
1337 
1338   // Note that the null check must have been done earlier.
1339   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1340 
1341   CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1342   CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1343   CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1344   CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1345   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1346 
1347   // Check our assumptions for registers.
1348   DCHECK_EQ(string_obj.AsRegister(), RDI);
1349   DCHECK_EQ(search_value.AsRegister(), RAX);
1350   DCHECK_EQ(counter.AsRegister(), RCX);
1351   DCHECK_EQ(out.AsRegister(), RDI);
1352 
1353   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1354   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1355   SlowPathCode* slow_path = nullptr;
1356   HInstruction* code_point = invoke->InputAt(1);
1357   if (code_point->IsIntConstant()) {
1358     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1359         std::numeric_limits<uint16_t>::max()) {
1360       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1361       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1362       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1363       codegen->AddSlowPath(slow_path);
1364       __ jmp(slow_path->GetEntryLabel());
1365       __ Bind(slow_path->GetExitLabel());
1366       return;
1367     }
1368   } else if (code_point->GetType() != DataType::Type::kUint16) {
1369     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1370     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1371     codegen->AddSlowPath(slow_path);
1372     __ j(kAbove, slow_path->GetEntryLabel());
1373   }
1374 
1375   // From here down, we know that we are looking for a char that fits in
1376   // 16 bits (uncompressed) or 8 bits (compressed).
1377   // Location of reference to data array within the String object.
1378   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1379   // Location of count within the String object.
1380   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1381 
1382   // Load the count field of the string containing the length and compression flag.
1383   __ movl(string_length, Address(string_obj, count_offset));
1384 
1385   // Do a zero-length check. Even with string compression `count == 0` means empty.
1386   // TODO: Support jecxz.
1387   NearLabel not_found_label;
1388   __ testl(string_length, string_length);
1389   __ j(kEqual, &not_found_label);
1390 
1391   if (mirror::kUseStringCompression) {
1392     // Use TMP to keep string_length_flagged.
1393     __ movl(CpuRegister(TMP), string_length);
1394     // Mask out first bit used as compression flag.
1395     __ shrl(string_length, Immediate(1));
1396   }
1397 
1398   if (start_at_zero) {
1399     // Number of chars to scan is the same as the string length.
1400     __ movl(counter, string_length);
1401     // Move to the start of the string.
1402     __ addq(string_obj, Immediate(value_offset));
1403   } else {
1404     CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1405 
1406     // Do a start_index check.
1407     __ cmpl(start_index, string_length);
1408     __ j(kGreaterEqual, &not_found_label);
1409 
1410     // Ensure we have a start index >= 0;
1411     __ xorl(counter, counter);
1412     __ cmpl(start_index, Immediate(0));
1413     __ cmov(kGreater, counter, start_index, /* is64bit= */ false);  // 32-bit copy is enough.
1414 
1415     if (mirror::kUseStringCompression) {
1416       NearLabel modify_counter, offset_uncompressed_label;
1417       __ testl(CpuRegister(TMP), Immediate(1));
1418       __ j(kNotZero, &offset_uncompressed_label);
1419       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1420       __ jmp(&modify_counter);
1421       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1422       __ Bind(&offset_uncompressed_label);
1423       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1424       __ Bind(&modify_counter);
1425     } else {
1426       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1427     }
1428     // Now update ecx, the work counter: it's gonna be string.length - start_index.
1429     __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
1430     __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1431   }
1432 
1433   if (mirror::kUseStringCompression) {
1434     NearLabel uncompressed_string_comparison;
1435     NearLabel comparison_done;
1436     __ testl(CpuRegister(TMP), Immediate(1));
1437     __ j(kNotZero, &uncompressed_string_comparison);
1438     // Check if RAX (search_value) is ASCII.
1439     __ cmpl(search_value, Immediate(127));
1440     __ j(kGreater, &not_found_label);
1441     // Comparing byte-per-byte.
1442     __ repne_scasb();
1443     __ jmp(&comparison_done);
1444     // Everything is set up for repne scasw:
1445     //   * Comparison address in RDI.
1446     //   * Counter in ECX.
1447     __ Bind(&uncompressed_string_comparison);
1448     __ repne_scasw();
1449     __ Bind(&comparison_done);
1450   } else {
1451     __ repne_scasw();
1452   }
1453   // Did we find a match?
1454   __ j(kNotEqual, &not_found_label);
1455 
1456   // Yes, we matched.  Compute the index of the result.
1457   __ subl(string_length, counter);
1458   __ leal(out, Address(string_length, -1));
1459 
1460   NearLabel done;
1461   __ jmp(&done);
1462 
1463   // Failed to match; return -1.
1464   __ Bind(&not_found_label);
1465   __ movl(out, Immediate(-1));
1466 
1467   // And join up at the end.
1468   __ Bind(&done);
1469   if (slow_path != nullptr) {
1470     __ Bind(slow_path->GetExitLabel());
1471   }
1472 }
1473 
VisitStringIndexOf(HInvoke * invoke)1474 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1475   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1476 }
1477 
VisitStringIndexOf(HInvoke * invoke)1478 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1479   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1480 }
1481 
VisitStringIndexOfAfter(HInvoke * invoke)1482 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1483   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1484 }
1485 
VisitStringIndexOfAfter(HInvoke * invoke)1486 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1487   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1488 }
1489 
VisitStringNewStringFromBytes(HInvoke * invoke)1490 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1491   LocationSummary* locations = new (allocator_) LocationSummary(
1492       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1493   InvokeRuntimeCallingConvention calling_convention;
1494   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1495   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1496   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1497   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1498   locations->SetOut(Location::RegisterLocation(RAX));
1499 }
1500 
VisitStringNewStringFromBytes(HInvoke * invoke)1501 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1502   X86_64Assembler* assembler = GetAssembler();
1503   LocationSummary* locations = invoke->GetLocations();
1504 
1505   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1506   __ testl(byte_array, byte_array);
1507   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1508   codegen_->AddSlowPath(slow_path);
1509   __ j(kEqual, slow_path->GetEntryLabel());
1510 
1511   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1512   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1513   __ Bind(slow_path->GetExitLabel());
1514 }
1515 
VisitStringNewStringFromChars(HInvoke * invoke)1516 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1517   LocationSummary* locations =
1518       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1519   InvokeRuntimeCallingConvention calling_convention;
1520   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1521   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1522   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1523   locations->SetOut(Location::RegisterLocation(RAX));
1524 }
1525 
VisitStringNewStringFromChars(HInvoke * invoke)1526 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1527   // No need to emit code checking whether `locations->InAt(2)` is a null
1528   // pointer, as callers of the native method
1529   //
1530   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1531   //
1532   // all include a null check on `data` before calling that method.
1533   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1534   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1535 }
1536 
VisitStringNewStringFromString(HInvoke * invoke)1537 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1538   LocationSummary* locations = new (allocator_) LocationSummary(
1539       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1540   InvokeRuntimeCallingConvention calling_convention;
1541   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1542   locations->SetOut(Location::RegisterLocation(RAX));
1543 }
1544 
VisitStringNewStringFromString(HInvoke * invoke)1545 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1546   X86_64Assembler* assembler = GetAssembler();
1547   LocationSummary* locations = invoke->GetLocations();
1548 
1549   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1550   __ testl(string_to_copy, string_to_copy);
1551   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1552   codegen_->AddSlowPath(slow_path);
1553   __ j(kEqual, slow_path->GetEntryLabel());
1554 
1555   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1556   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1557   __ Bind(slow_path->GetExitLabel());
1558 }
1559 
VisitStringGetCharsNoCheck(HInvoke * invoke)1560 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1561   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1562   LocationSummary* locations =
1563       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1564   locations->SetInAt(0, Location::RequiresRegister());
1565   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1566   locations->SetInAt(2, Location::RequiresRegister());
1567   locations->SetInAt(3, Location::RequiresRegister());
1568   locations->SetInAt(4, Location::RequiresRegister());
1569 
1570   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1571   locations->AddTemp(Location::RegisterLocation(RSI));
1572   locations->AddTemp(Location::RegisterLocation(RDI));
1573   locations->AddTemp(Location::RegisterLocation(RCX));
1574 }
1575 
VisitStringGetCharsNoCheck(HInvoke * invoke)1576 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1577   X86_64Assembler* assembler = GetAssembler();
1578   LocationSummary* locations = invoke->GetLocations();
1579 
1580   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1581   // Location of data in char array buffer.
1582   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1583   // Location of char array data in string.
1584   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1585 
1586   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1587   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1588   Location srcBegin = locations->InAt(1);
1589   int srcBegin_value =
1590       srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1591   CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1592   CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1593   CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1594 
1595   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1596   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1597   DCHECK_EQ(char_size, 2u);
1598 
1599   NearLabel done;
1600   // Compute the number of chars (words) to move.
1601   __ movl(CpuRegister(RCX), srcEnd);
1602   if (srcBegin.IsConstant()) {
1603     __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1604   } else {
1605     DCHECK(srcBegin.IsRegister());
1606     __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1607   }
1608   if (mirror::kUseStringCompression) {
1609     NearLabel copy_uncompressed, copy_loop;
1610     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1611     DCHECK_EQ(c_char_size, 1u);
1612     // Location of count in string.
1613     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1614 
1615     __ testl(Address(obj, count_offset), Immediate(1));
1616     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1617                   "Expecting 0=compressed, 1=uncompressed");
1618     __ j(kNotZero, &copy_uncompressed);
1619     // Compute the address of the source string by adding the number of chars from
1620     // the source beginning to the value offset of a string.
1621     __ leaq(CpuRegister(RSI),
1622             CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1623     // Start the loop to copy String's value to Array of Char.
1624     __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1625 
1626     __ Bind(&copy_loop);
1627     __ jrcxz(&done);
1628     // Use TMP as temporary (convert byte from RSI to word).
1629     // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1630     __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1631     __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1632     __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1633     __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1634     // TODO: Add support for LOOP to X86_64Assembler.
1635     __ subl(CpuRegister(RCX), Immediate(1));
1636     __ jmp(&copy_loop);
1637 
1638     __ Bind(&copy_uncompressed);
1639   }
1640 
1641   __ leaq(CpuRegister(RSI),
1642           CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1643   // Compute the address of the destination buffer.
1644   __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1645   // Do the move.
1646   __ rep_movsw();
1647 
1648   __ Bind(&done);
1649 }
1650 
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1651 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1652   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1653   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
1654   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1655   // to avoid a SIGBUS.
1656   switch (size) {
1657     case DataType::Type::kInt8:
1658       __ movsxb(out, Address(address, 0));
1659       break;
1660     case DataType::Type::kInt16:
1661       __ movsxw(out, Address(address, 0));
1662       break;
1663     case DataType::Type::kInt32:
1664       __ movl(out, Address(address, 0));
1665       break;
1666     case DataType::Type::kInt64:
1667       __ movq(out, Address(address, 0));
1668       break;
1669     default:
1670       LOG(FATAL) << "Type not recognized for peek: " << size;
1671       UNREACHABLE();
1672   }
1673 }
1674 
VisitMemoryPeekByte(HInvoke * invoke)1675 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1676   CreateIntToIntLocations(allocator_, invoke);
1677 }
1678 
VisitMemoryPeekByte(HInvoke * invoke)1679 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1680   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1681 }
1682 
VisitMemoryPeekIntNative(HInvoke * invoke)1683 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1684   CreateIntToIntLocations(allocator_, invoke);
1685 }
1686 
VisitMemoryPeekIntNative(HInvoke * invoke)1687 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1688   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1689 }
1690 
VisitMemoryPeekLongNative(HInvoke * invoke)1691 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1692   CreateIntToIntLocations(allocator_, invoke);
1693 }
1694 
VisitMemoryPeekLongNative(HInvoke * invoke)1695 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1696   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1697 }
1698 
VisitMemoryPeekShortNative(HInvoke * invoke)1699 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1700   CreateIntToIntLocations(allocator_, invoke);
1701 }
1702 
VisitMemoryPeekShortNative(HInvoke * invoke)1703 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1704   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1705 }
1706 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1707 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1708   LocationSummary* locations =
1709       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1710   locations->SetInAt(0, Location::RequiresRegister());
1711   locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1712 }
1713 
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1714 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1715   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1716   Location value = locations->InAt(1);
1717   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1718   // to avoid a SIGBUS.
1719   switch (size) {
1720     case DataType::Type::kInt8:
1721       if (value.IsConstant()) {
1722         __ movb(Address(address, 0),
1723                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1724       } else {
1725         __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1726       }
1727       break;
1728     case DataType::Type::kInt16:
1729       if (value.IsConstant()) {
1730         __ movw(Address(address, 0),
1731                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1732       } else {
1733         __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1734       }
1735       break;
1736     case DataType::Type::kInt32:
1737       if (value.IsConstant()) {
1738         __ movl(Address(address, 0),
1739                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1740       } else {
1741         __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1742       }
1743       break;
1744     case DataType::Type::kInt64:
1745       if (value.IsConstant()) {
1746         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1747         DCHECK(IsInt<32>(v));
1748         int32_t v_32 = v;
1749         __ movq(Address(address, 0), Immediate(v_32));
1750       } else {
1751         __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1752       }
1753       break;
1754     default:
1755       LOG(FATAL) << "Type not recognized for poke: " << size;
1756       UNREACHABLE();
1757   }
1758 }
1759 
VisitMemoryPokeByte(HInvoke * invoke)1760 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1761   CreateIntIntToVoidLocations(allocator_, invoke);
1762 }
1763 
VisitMemoryPokeByte(HInvoke * invoke)1764 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1765   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1766 }
1767 
VisitMemoryPokeIntNative(HInvoke * invoke)1768 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1769   CreateIntIntToVoidLocations(allocator_, invoke);
1770 }
1771 
VisitMemoryPokeIntNative(HInvoke * invoke)1772 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1773   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1774 }
1775 
VisitMemoryPokeLongNative(HInvoke * invoke)1776 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1777   CreateIntIntToVoidLocations(allocator_, invoke);
1778 }
1779 
VisitMemoryPokeLongNative(HInvoke * invoke)1780 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1781   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1782 }
1783 
VisitMemoryPokeShortNative(HInvoke * invoke)1784 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1785   CreateIntIntToVoidLocations(allocator_, invoke);
1786 }
1787 
VisitMemoryPokeShortNative(HInvoke * invoke)1788 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1789   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1790 }
1791 
VisitThreadCurrentThread(HInvoke * invoke)1792 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1793   LocationSummary* locations =
1794       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1795   locations->SetOut(Location::RequiresRegister());
1796 }
1797 
VisitThreadCurrentThread(HInvoke * invoke)1798 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1799   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1800   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1801                                                     /* no_rip= */ true));
1802 }
1803 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1804 static void GenUnsafeGet(HInvoke* invoke,
1805                          DataType::Type type,
1806                          [[maybe_unused]] bool is_volatile,
1807                          CodeGeneratorX86_64* codegen) {
1808   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1809   LocationSummary* locations = invoke->GetLocations();
1810   Location base_loc = locations->InAt(1);
1811   CpuRegister base = base_loc.AsRegister<CpuRegister>();
1812   Location offset_loc = locations->InAt(2);
1813   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1814   Location output_loc = locations->Out();
1815   CpuRegister output = output_loc.AsRegister<CpuRegister>();
1816 
1817   switch (type) {
1818     case DataType::Type::kInt8:
1819       __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1820       break;
1821 
1822     case DataType::Type::kInt32:
1823       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1824       break;
1825 
1826     case DataType::Type::kReference: {
1827       if (codegen->EmitReadBarrier()) {
1828         if (kUseBakerReadBarrier) {
1829           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1830           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1831               invoke, output_loc, base, src, /* needs_null_check= */ false);
1832         } else {
1833           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1834           codegen->GenerateReadBarrierSlow(
1835               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1836         }
1837       } else {
1838         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1839         __ MaybeUnpoisonHeapReference(output);
1840       }
1841       break;
1842     }
1843 
1844     case DataType::Type::kInt64:
1845       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1846       break;
1847 
1848     default:
1849       LOG(FATAL) << "Unsupported op size " << type;
1850       UNREACHABLE();
1851   }
1852 }
1853 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)1854 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1855                                           HInvoke* invoke,
1856                                           CodeGeneratorX86_64* codegen) {
1857   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1858   LocationSummary* locations =
1859       new (allocator) LocationSummary(invoke,
1860                                       can_call
1861                                           ? LocationSummary::kCallOnSlowPath
1862                                           : LocationSummary::kNoCall,
1863                                       kIntrinsified);
1864   if (can_call && kUseBakerReadBarrier) {
1865     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1866   }
1867   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1868   locations->SetInAt(1, Location::RequiresRegister());
1869   locations->SetInAt(2, Location::RequiresRegister());
1870   locations->SetOut(Location::RequiresRegister(),
1871                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1872 }
1873 
VisitUnsafeGet(HInvoke * invoke)1874 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1875   VisitJdkUnsafeGet(invoke);
1876 }
VisitUnsafeGetVolatile(HInvoke * invoke)1877 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1878   VisitJdkUnsafeGetVolatile(invoke);
1879 }
VisitUnsafeGetLong(HInvoke * invoke)1880 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1881   VisitJdkUnsafeGetLong(invoke);
1882 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1883 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1884   VisitJdkUnsafeGetLongVolatile(invoke);
1885 }
VisitUnsafeGetObject(HInvoke * invoke)1886 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1887   VisitJdkUnsafeGetReference(invoke);
1888 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1889 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1890   VisitJdkUnsafeGetReferenceVolatile(invoke);
1891 }
VisitUnsafeGetByte(HInvoke * invoke)1892 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
1893   VisitJdkUnsafeGetByte(invoke);
1894 }
1895 
VisitJdkUnsafeGet(HInvoke * invoke)1896 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1897   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1898 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1899 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1900   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1901 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1902 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1903   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1904 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1905 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1906   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1907 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1908 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1909   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1910 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1911 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1912   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1913 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1914 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1915   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1916 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1917 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1918   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1919 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1920 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1921   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1922 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1923 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1924   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1925 }
1926 
VisitUnsafeGet(HInvoke * invoke)1927 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
1928   VisitJdkUnsafeGet(invoke);
1929 }
VisitUnsafeGetVolatile(HInvoke * invoke)1930 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1931   VisitJdkUnsafeGetVolatile(invoke);
1932 }
VisitUnsafeGetLong(HInvoke * invoke)1933 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1934   VisitJdkUnsafeGetLong(invoke);
1935 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1936 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1937   VisitJdkUnsafeGetLongVolatile(invoke);
1938 }
VisitUnsafeGetObject(HInvoke * invoke)1939 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1940   VisitJdkUnsafeGetReference(invoke);
1941 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1942 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1943   VisitJdkUnsafeGetReferenceVolatile(invoke);
1944 }
VisitUnsafeGetByte(HInvoke * invoke)1945 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
1946   VisitJdkUnsafeGetByte(invoke);
1947 }
1948 
VisitJdkUnsafeGet(HInvoke * invoke)1949 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1950   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1951 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1952 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1953   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1954 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1955 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1956   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1957 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1958 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1959   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
1960 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1961 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1962   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1963 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1964 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1965   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1966 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1967 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1968   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
1969 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1970 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1971   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1972 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1973 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1974   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1975 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1976 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1977   GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
1978 }
1979 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1980 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1981                                                        DataType::Type type,
1982                                                        HInvoke* invoke) {
1983   LocationSummary* locations =
1984       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1985   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1986   locations->SetInAt(1, Location::RequiresRegister());
1987   locations->SetInAt(2, Location::RequiresRegister());
1988   locations->SetInAt(3, Location::RequiresRegister());
1989   if (type == DataType::Type::kReference) {
1990     // Need temp registers for card-marking.
1991     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1992     locations->AddTemp(Location::RequiresRegister());
1993   }
1994 }
1995 
VisitUnsafePut(HInvoke * invoke)1996 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1997   VisitJdkUnsafePut(invoke);
1998 }
VisitUnsafePutOrdered(HInvoke * invoke)1999 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2000   VisitJdkUnsafePutOrdered(invoke);
2001 }
VisitUnsafePutVolatile(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2003   VisitJdkUnsafePutVolatile(invoke);
2004 }
VisitUnsafePutObject(HInvoke * invoke)2005 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2006   VisitJdkUnsafePutReference(invoke);
2007 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2008 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2009   VisitJdkUnsafePutObjectOrdered(invoke);
2010 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2011 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2012   VisitJdkUnsafePutReferenceVolatile(invoke);
2013 }
VisitUnsafePutLong(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2015   VisitJdkUnsafePutLong(invoke);
2016 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2017 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2018   VisitJdkUnsafePutLongOrdered(invoke);
2019 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2020 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2021   VisitJdkUnsafePutLongVolatile(invoke);
2022 }
VisitUnsafePutByte(HInvoke * invoke)2023 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2024   VisitJdkUnsafePut(invoke);
2025 }
2026 
VisitJdkUnsafePut(HInvoke * invoke)2027 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2028   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2029 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2030 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2031   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2032 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2033 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2034   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2035 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2036 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2037   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2038 }
VisitJdkUnsafePutReference(HInvoke * invoke)2039 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2040   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2041 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2042 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2043   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2044 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2045 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2046   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2047 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2048 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2049   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2050 }
VisitJdkUnsafePutLong(HInvoke * invoke)2051 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2052   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2053 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2054 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2055   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2056 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2057 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2058   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2059 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2060 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2061   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2062 }
VisitJdkUnsafePutByte(HInvoke * invoke)2063 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2064   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt8, invoke);
2065 }
2066 
2067 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2068 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2069 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
2070                          CodeGeneratorX86_64* codegen) {
2071   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2072   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2073   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2074   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2075 
2076   if (type == DataType::Type::kInt64) {
2077     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2078   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2079     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2080     __ movl(temp, value);
2081     __ PoisonHeapReference(temp);
2082     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2083   } else {
2084     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2085   }
2086 
2087   if (is_volatile) {
2088     codegen->MemoryFence();
2089   }
2090 
2091   if (type == DataType::Type::kReference) {
2092     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2093     codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2094                              locations->GetTemp(1).AsRegister<CpuRegister>(),
2095                              base,
2096                              value,
2097                              value_can_be_null);
2098   }
2099 }
2100 
VisitUnsafePut(HInvoke * invoke)2101 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2102   VisitJdkUnsafePut(invoke);
2103 }
VisitUnsafePutOrdered(HInvoke * invoke)2104 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2105   VisitJdkUnsafePutOrdered(invoke);
2106 }
VisitUnsafePutVolatile(HInvoke * invoke)2107 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2108   VisitJdkUnsafePutVolatile(invoke);
2109 }
VisitUnsafePutObject(HInvoke * invoke)2110 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2111   VisitJdkUnsafePutReference(invoke);
2112 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2113 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2114   VisitJdkUnsafePutObjectOrdered(invoke);
2115 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2116 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2117   VisitJdkUnsafePutReferenceVolatile(invoke);
2118 }
VisitUnsafePutLong(HInvoke * invoke)2119 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2120   VisitJdkUnsafePutLong(invoke);
2121 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2122 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2123   VisitJdkUnsafePutLongOrdered(invoke);
2124 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2125 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2126   VisitJdkUnsafePutLongVolatile(invoke);
2127 }
VisitUnsafePutByte(HInvoke * invoke)2128 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2129   VisitJdkUnsafePutByte(invoke);
2130 }
2131 
VisitJdkUnsafePut(HInvoke * invoke)2132 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2133   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2134 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2135 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2136   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2137 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2138 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2139   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2140 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2141 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2142   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
2143 }
VisitJdkUnsafePutReference(HInvoke * invoke)2144 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2145   GenUnsafePut(
2146       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2147 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2148 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2149   GenUnsafePut(
2150       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2151 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2152 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2153   GenUnsafePut(
2154       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2155 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2156 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2157   GenUnsafePut(
2158       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2159 }
VisitJdkUnsafePutLong(HInvoke * invoke)2160 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2161   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2162 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2163 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2164   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2165 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2166 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2167   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2168 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2169 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2170   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2171 }
VisitJdkUnsafePutByte(HInvoke * invoke)2172 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2173   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2174 }
2175 
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)2176 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2177                                      HInvoke* invoke,
2178                                      CodeGeneratorX86_64* codegen,
2179                                      DataType::Type type) {
2180   const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2181   LocationSummary* locations =
2182       new (allocator) LocationSummary(invoke,
2183                                       can_call
2184                                           ? LocationSummary::kCallOnSlowPath
2185                                           : LocationSummary::kNoCall,
2186                                       kIntrinsified);
2187   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2188   locations->SetInAt(1, Location::RequiresRegister());
2189   locations->SetInAt(2, Location::RequiresRegister());
2190   // expected value must be in EAX/RAX.
2191   locations->SetInAt(3, Location::RegisterLocation(RAX));
2192   locations->SetInAt(4, Location::RequiresRegister());
2193 
2194   // RAX is clobbered in CMPXCHG, but we set it as out so no need to add it as temporary.
2195   locations->SetOut(Location::RegisterLocation(RAX));
2196 
2197   if (type == DataType::Type::kReference) {
2198     // Need two temporaries for MarkGCCard.
2199     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2200     locations->AddTemp(Location::RequiresRegister());
2201     if (codegen->EmitReadBarrier()) {
2202       // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
2203       DCHECK(kUseBakerReadBarrier);
2204       locations->AddTemp(Location::RequiresRegister());
2205     }
2206   }
2207 }
2208 
VisitUnsafeCASInt(HInvoke * invoke)2209 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2210   VisitJdkUnsafeCASInt(invoke);
2211 }
2212 
VisitUnsafeCASLong(HInvoke * invoke)2213 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2214   VisitJdkUnsafeCASLong(invoke);
2215 }
2216 
VisitUnsafeCASObject(HInvoke * invoke)2217 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2218   VisitJdkUnsafeCASObject(invoke);
2219 }
2220 
VisitJdkUnsafeCASInt(HInvoke * invoke)2221 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2222   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2223   VisitJdkUnsafeCompareAndSetInt(invoke);
2224 }
2225 
VisitJdkUnsafeCASLong(HInvoke * invoke)2226 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2227   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2228   VisitJdkUnsafeCompareAndSetLong(invoke);
2229 }
2230 
VisitJdkUnsafeCASObject(HInvoke * invoke)2231 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2232   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2233   VisitJdkUnsafeCompareAndSetReference(invoke);
2234 }
2235 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2236 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2237   CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt32);
2238 }
2239 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2240 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2241   CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt64);
2242 }
2243 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2244 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2245   // The only supported read barrier implementation is the Baker-style read barriers.
2246   if (codegen_->EmitNonBakerReadBarrier()) {
2247     return;
2248   }
2249 
2250   CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kReference);
2251 }
2252 
2253 // Convert ZF into the Boolean result.
GenZFlagToResult(X86_64Assembler * assembler,CpuRegister out)2254 static inline void GenZFlagToResult(X86_64Assembler* assembler, CpuRegister out) {
2255   __ setcc(kZero, out);
2256   __ movzxb(out, out);
2257 }
2258 
2259 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64 * codegen,DataType::Type type,Address field_addr,Location value,bool is_cmpxchg,bool byte_swap)2260 static void GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64* codegen,
2261                                           DataType::Type type,
2262                                           Address field_addr,
2263                                           Location value,
2264                                           bool is_cmpxchg,
2265                                           bool byte_swap) {
2266   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2267   InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2268 
2269   if (byte_swap) {
2270     instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2271     instr_codegen->Bswap(value, type);
2272   }
2273 
2274   switch (type) {
2275     case DataType::Type::kBool:
2276     case DataType::Type::kInt8:
2277       __ LockCmpxchgb(field_addr, value.AsRegister<CpuRegister>());
2278       break;
2279     case DataType::Type::kInt16:
2280     case DataType::Type::kUint16:
2281       __ LockCmpxchgw(field_addr, value.AsRegister<CpuRegister>());
2282       break;
2283     case DataType::Type::kInt32:
2284     case DataType::Type::kUint32:
2285       __ LockCmpxchgl(field_addr, value.AsRegister<CpuRegister>());
2286       break;
2287     case DataType::Type::kInt64:
2288     case DataType::Type::kUint64:
2289       __ LockCmpxchgq(field_addr, value.AsRegister<CpuRegister>());
2290       break;
2291     default:
2292       LOG(FATAL) << "Unexpected non-integral CAS type " << type;
2293   }
2294   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2295 
2296   if (byte_swap) {
2297     // Restore byte order for value.
2298     instr_codegen->Bswap(value, type);
2299   }
2300 
2301   CpuRegister rax(RAX);
2302   if (is_cmpxchg) {
2303     if (byte_swap) {
2304       instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2305     }
2306     // Sign-extend or zero-extend the result as necessary.
2307     switch (type) {
2308       case DataType::Type::kBool:
2309         __ movzxb(rax, rax);
2310         break;
2311       case DataType::Type::kInt8:
2312         __ movsxb(rax, rax);
2313         break;
2314       case DataType::Type::kInt16:
2315         __ movsxw(rax, rax);
2316         break;
2317       case DataType::Type::kUint16:
2318         __ movzxw(rax, rax);
2319         break;
2320       default:
2321         break;  // No need to do anything.
2322     }
2323   } else {
2324     GenZFlagToResult(assembler, rax);
2325   }
2326 }
2327 
GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64 * codegen,Address field_addr,CpuRegister temp,Location value,Location expected,Location out,bool is64bit,bool is_cmpxchg,bool byte_swap)2328 static void GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64* codegen,
2329                                          Address field_addr,
2330                                          CpuRegister temp,
2331                                          Location value,
2332                                          Location expected,
2333                                          Location out,
2334                                          bool is64bit,
2335                                          bool is_cmpxchg,
2336                                          bool byte_swap) {
2337   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2338   InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2339 
2340   Location rax_loc = Location::RegisterLocation(RAX);
2341   Location temp_loc = Location::RegisterLocation(temp.AsRegister());
2342 
2343   DataType::Type type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
2344 
2345   // Copy `expected` to RAX (required by the CMPXCHG instruction).
2346   codegen->Move(rax_loc, expected);
2347 
2348   // Copy value to some other register (ensure it's not RAX).
2349   DCHECK_NE(temp.AsRegister(), RAX);
2350   codegen->Move(temp_loc, value);
2351 
2352   if (byte_swap) {
2353     instr_codegen->Bswap(rax_loc, type);
2354     instr_codegen->Bswap(temp_loc, type);
2355   }
2356 
2357   if (is64bit) {
2358     __ LockCmpxchgq(field_addr, temp);
2359   } else {
2360     __ LockCmpxchgl(field_addr, temp);
2361   }
2362   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2363   // No need to restore byte order for temporary register.
2364 
2365   if (is_cmpxchg) {
2366     if (byte_swap) {
2367       instr_codegen->Bswap(rax_loc, type);
2368     }
2369     __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
2370   } else {
2371     GenZFlagToResult(assembler, out.AsRegister<CpuRegister>());
2372   }
2373 }
2374 
2375 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64 * codegen,HInvoke * invoke,CpuRegister base,CpuRegister offset,CpuRegister value,CpuRegister temp1,CpuRegister temp2,CpuRegister temp3,bool is_cmpxchg)2376 static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
2377                                           HInvoke* invoke,
2378                                           CpuRegister base,
2379                                           CpuRegister offset,
2380                                           CpuRegister value,
2381                                           CpuRegister temp1,
2382                                           CpuRegister temp2,
2383                                           CpuRegister temp3,
2384                                           bool is_cmpxchg) {
2385   // The only supported read barrier implementation is the Baker-style read barriers.
2386   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2387 
2388   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2389 
2390   // Mark card for object assuming new value is stored.
2391   bool value_can_be_null = true;  // TODO: Worth finding out this information?
2392   codegen->MaybeMarkGCCard(temp1, temp2, base, value, value_can_be_null);
2393 
2394   Address field_addr(base, offset, TIMES_1, 0);
2395   if (codegen->EmitBakerReadBarrier()) {
2396     // Need to make sure the reference stored in the field is a to-space
2397     // one before attempting the CAS or the CAS could fail incorrectly.
2398     codegen->GenerateReferenceLoadWithBakerReadBarrier(
2399         invoke,
2400         Location::RegisterLocation(temp3.AsRegister()),
2401         base,
2402         field_addr,
2403         /* needs_null_check= */ false,
2404         /* always_update_field= */ true,
2405         &temp1,
2406         &temp2);
2407   } else {
2408     // Nothing to do, the value will be loaded into the out register by CMPXCHG.
2409   }
2410 
2411   bool base_equals_value = (base.AsRegister() == value.AsRegister());
2412   Register value_reg = value.AsRegister();
2413   if (kPoisonHeapReferences) {
2414     if (base_equals_value) {
2415       // If `base` and `value` are the same register location, move `value_reg` to a temporary
2416       // register.  This way, poisoning `value_reg` won't invalidate `base`.
2417       value_reg = temp1.AsRegister();
2418       __ movl(CpuRegister(value_reg), base);
2419     }
2420 
2421     // Check that the register allocator did not assign the location of expected value (RAX) to
2422     // `value` nor to `base`, so that heap poisoning (when enabled) works as intended below.
2423     // - If `value` were equal to RAX, both references would be poisoned twice, meaning they would
2424     //   not be poisoned at all, as heap poisoning uses address negation.
2425     // - If `base` were equal to RAX, poisoning RAX would invalidate `base`.
2426     DCHECK_NE(RAX, value_reg);
2427     DCHECK_NE(RAX, base.AsRegister());
2428 
2429     __ PoisonHeapReference(CpuRegister(RAX));
2430     __ PoisonHeapReference(CpuRegister(value_reg));
2431   }
2432 
2433   __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2434   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
2435 
2436   if (is_cmpxchg) {
2437     // Output is in RAX, so we can rely on CMPXCHG and do nothing.
2438     __ MaybeUnpoisonHeapReference(CpuRegister(RAX));
2439   } else {
2440     GenZFlagToResult(assembler, CpuRegister(RAX));
2441   }
2442 
2443   // If heap poisoning is enabled, we need to unpoison the values that were poisoned earlier.
2444   if (kPoisonHeapReferences) {
2445     if (base_equals_value) {
2446       // `value_reg` has been moved to a temporary register, no need to unpoison it.
2447     } else {
2448       // Ensure `value` is not RAX, so that unpoisoning the former does not invalidate the latter.
2449       DCHECK_NE(RAX, value_reg);
2450       __ UnpoisonHeapReference(CpuRegister(value_reg));
2451     }
2452   }
2453 }
2454 
2455 // In debug mode, return true if all registers are pairwise different. In release mode, do nothing
2456 // and always return true.
RegsAreAllDifferent(const std::vector<CpuRegister> & regs)2457 static bool RegsAreAllDifferent(const std::vector<CpuRegister>& regs) {
2458   if (kIsDebugBuild) {
2459     for (size_t i = 0; i < regs.size(); ++i) {
2460       for (size_t j = 0; j < i; ++j) {
2461         if (regs[i].AsRegister() == regs[j].AsRegister()) {
2462           return false;
2463         }
2464       }
2465     }
2466   }
2467   return true;
2468 }
2469 
2470 // GenCompareAndSetOrExchange handles all value types and therefore accepts generic locations and
2471 // temporary indices that may not correspond to real registers for code paths that do not use them.
GenCompareAndSetOrExchange(CodeGeneratorX86_64 * codegen,HInvoke * invoke,DataType::Type type,CpuRegister base,CpuRegister offset,uint32_t temp1_index,uint32_t temp2_index,uint32_t temp3_index,Location new_value,Location expected,Location out,bool is_cmpxchg,bool byte_swap)2472 static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
2473                                        HInvoke* invoke,
2474                                        DataType::Type type,
2475                                        CpuRegister base,
2476                                        CpuRegister offset,
2477                                        uint32_t temp1_index,
2478                                        uint32_t temp2_index,
2479                                        uint32_t temp3_index,
2480                                        Location new_value,
2481                                        Location expected,
2482                                        Location out,
2483                                        bool is_cmpxchg,
2484                                        bool byte_swap) {
2485   LocationSummary* locations = invoke->GetLocations();
2486   Address field_address(base, offset, TIMES_1, 0);
2487 
2488   if (DataType::IsFloatingPointType(type)) {
2489     bool is64bit = (type == DataType::Type::kFloat64);
2490     CpuRegister temp = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2491     DCHECK(RegsAreAllDifferent({base, offset, temp, CpuRegister(RAX)}));
2492 
2493     GenCompareAndSetOrExchangeFP(
2494         codegen, field_address, temp, new_value, expected, out, is64bit, is_cmpxchg, byte_swap);
2495   } else {
2496     // Both the expected value for CMPXCHG and the output are in RAX.
2497     DCHECK_EQ(RAX, expected.AsRegister<Register>());
2498     DCHECK_EQ(RAX, out.AsRegister<Register>());
2499 
2500     if (type == DataType::Type::kReference) {
2501       CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
2502       CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2503       CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
2504       CpuRegister temp3 = codegen->EmitReadBarrier()
2505           ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
2506           : CpuRegister(kNoRegister);
2507       DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
2508 
2509       DCHECK(!byte_swap);
2510       GenCompareAndSetOrExchangeRef(
2511           codegen, invoke, base, offset, new_value_reg, temp1, temp2, temp3, is_cmpxchg);
2512     } else {
2513       GenCompareAndSetOrExchangeInt(codegen, type, field_address, new_value, is_cmpxchg, byte_swap);
2514     }
2515   }
2516 }
2517 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2518 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2519   LocationSummary* locations = invoke->GetLocations();
2520   GenCompareAndSetOrExchange(codegen,
2521                              invoke,
2522                              type,
2523                              /*base=*/ locations->InAt(1).AsRegister<CpuRegister>(),
2524                              /*offset=*/ locations->InAt(2).AsRegister<CpuRegister>(),
2525                              /*temp1_index=*/ 0,
2526                              /*temp2_index=*/ 1,
2527                              /*temp3_index=*/ 2,
2528                              /*new_value=*/ locations->InAt(4),
2529                              /*expected=*/ locations->InAt(3),
2530                              locations->Out(),
2531                              /*is_cmpxchg=*/ false,
2532                              /*byte_swap=*/ false);
2533 }
2534 
VisitUnsafeCASInt(HInvoke * invoke)2535 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2536   VisitJdkUnsafeCASInt(invoke);
2537 }
2538 
VisitUnsafeCASLong(HInvoke * invoke)2539 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2540   VisitJdkUnsafeCASLong(invoke);
2541 }
2542 
VisitUnsafeCASObject(HInvoke * invoke)2543 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2544   VisitJdkUnsafeCASObject(invoke);
2545 }
2546 
VisitJdkUnsafeCASInt(HInvoke * invoke)2547 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2548   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2549   VisitJdkUnsafeCompareAndSetInt(invoke);
2550 }
2551 
VisitJdkUnsafeCASLong(HInvoke * invoke)2552 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2553   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2554   VisitJdkUnsafeCompareAndSetLong(invoke);
2555 }
2556 
VisitJdkUnsafeCASObject(HInvoke * invoke)2557 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2558   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2559   VisitJdkUnsafeCompareAndSetReference(invoke);
2560 }
2561 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2562 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2563   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2564 }
2565 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2566 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2567   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2568 }
2569 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2570 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2571   // The only supported read barrier implementation is the Baker-style read barriers.
2572   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2573 
2574   GenCAS(DataType::Type::kReference, invoke, codegen_);
2575 }
2576 
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2577 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2578                                               HInvoke* invoke,
2579                                               CodeGeneratorX86_64* codegen) {
2580   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2581   LocationSummary* locations =
2582       new (allocator) LocationSummary(invoke,
2583                                       can_call
2584                                           ? LocationSummary::kCallOnSlowPath
2585                                           : LocationSummary::kNoCall,
2586                                       kIntrinsified);
2587   if (can_call && kUseBakerReadBarrier) {
2588     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2589   }
2590   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2591   locations->SetInAt(1, Location::RequiresRegister());
2592   locations->SetInAt(2, Location::RequiresRegister());
2593   // Use the same register for both the output and the new value or addend
2594   // to take advantage of XCHG or XADD. Arbitrarily pick RAX.
2595   locations->SetInAt(3, Location::RegisterLocation(RAX));
2596   locations->SetOut(Location::RegisterLocation(RAX));
2597 }
2598 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2599 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2600   VisitJdkUnsafeGetAndAddInt(invoke);
2601 }
2602 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2603 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2604   VisitJdkUnsafeGetAndAddLong(invoke);
2605 }
2606 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2607 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2608   VisitJdkUnsafeGetAndSetInt(invoke);
2609 }
2610 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2611 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2612   VisitJdkUnsafeGetAndSetLong(invoke);
2613 }
2614 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2615 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2616   VisitJdkUnsafeGetAndSetReference(invoke);
2617 }
2618 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2619 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2620   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2621 }
2622 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2623 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2624   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2625 }
2626 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2627 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2628   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2629 }
2630 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2631 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2632   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2633 }
2634 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2635 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2636   // The only supported read barrier implementation is the Baker-style read barriers.
2637   if (codegen_->EmitNonBakerReadBarrier()) {
2638     return;
2639   }
2640 
2641   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2642   invoke->GetLocations()->AddRegisterTemps(3);
2643 }
2644 
2645 enum class GetAndUpdateOp {
2646   kSet,
2647   kAdd,
2648   kBitwiseAnd,
2649   kBitwiseOr,
2650   kBitwiseXor
2651 };
2652 
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op)2653 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2654                                   DataType::Type type,
2655                                   CodeGeneratorX86_64* codegen,
2656                                   GetAndUpdateOp get_and_update_op) {
2657   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2658   LocationSummary* locations = invoke->GetLocations();
2659 
2660   CpuRegister out = locations->Out().AsRegister<CpuRegister>();       // Result.
2661   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();    // Object pointer.
2662   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();  // Long offset.
2663   DCHECK_EQ(out, locations->InAt(3).AsRegister<CpuRegister>());       // New value or addend.
2664   Address field_address(base, offset, TIMES_1, 0);
2665 
2666   if (type == DataType::Type::kInt32) {
2667     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2668       __ LockXaddl(field_address, out);
2669     } else {
2670       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2671       __ xchgl(out, field_address);
2672     }
2673   } else if (type == DataType::Type::kInt64) {
2674     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2675       __ LockXaddq(field_address, out);
2676     } else {
2677       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2678       __ xchgq(out, field_address);
2679     }
2680   } else {
2681     DCHECK_EQ(type, DataType::Type::kReference);
2682     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2683     CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2684     CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2685     CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
2686 
2687     if (codegen->EmitReadBarrier()) {
2688       DCHECK(kUseBakerReadBarrier);
2689       // Ensure that the field contains a to-space reference.
2690       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2691           invoke,
2692           Location::RegisterLocation(temp3.AsRegister()),
2693           base,
2694           field_address,
2695           /*needs_null_check=*/ false,
2696           /*always_update_field=*/ true,
2697           &temp1,
2698           &temp2);
2699     }
2700 
2701     // Mark card for object as a new value shall be stored.
2702     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
2703     codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out, new_value_can_be_null);
2704 
2705     if (kPoisonHeapReferences) {
2706       // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2707       // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2708       __ movl(temp1, out);
2709       __ PoisonHeapReference(temp1);
2710       __ xchgl(temp1, field_address);
2711       __ UnpoisonHeapReference(temp1);
2712       __ movl(out, temp1);
2713     } else {
2714       __ xchgl(out, field_address);
2715     }
2716   }
2717 }
2718 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2719 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2720   VisitJdkUnsafeGetAndAddInt(invoke);
2721 }
2722 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2723 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2724   VisitJdkUnsafeGetAndAddLong(invoke);
2725 }
2726 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2727 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2728   VisitJdkUnsafeGetAndSetInt(invoke);
2729 }
2730 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2731 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2732   VisitJdkUnsafeGetAndSetLong(invoke);
2733 }
2734 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2735 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2736   VisitJdkUnsafeGetAndSetReference(invoke);
2737 }
2738 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2739 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2740   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2741 }
2742 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2743 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2744   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2745 }
2746 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2747 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2748   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2749 }
2750 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2751 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2752   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2753 }
2754 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2755 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2756   GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2757 }
2758 
VisitIntegerReverse(HInvoke * invoke)2759 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2760   LocationSummary* locations =
2761       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2762   locations->SetInAt(0, Location::RequiresRegister());
2763   locations->SetOut(Location::SameAsFirstInput());
2764   locations->AddTemp(Location::RequiresRegister());
2765 }
2766 
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2767 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2768                      X86_64Assembler* assembler) {
2769   Immediate imm_shift(shift);
2770   Immediate imm_mask(mask);
2771   __ movl(temp, reg);
2772   __ shrl(reg, imm_shift);
2773   __ andl(temp, imm_mask);
2774   __ andl(reg, imm_mask);
2775   __ shll(temp, imm_shift);
2776   __ orl(reg, temp);
2777 }
2778 
VisitIntegerReverse(HInvoke * invoke)2779 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2780   X86_64Assembler* assembler = GetAssembler();
2781   LocationSummary* locations = invoke->GetLocations();
2782 
2783   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2784   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2785 
2786   /*
2787    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2788    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2789    * compared to generic luni implementation which has 5 rounds of swapping bits.
2790    * x = bswap x
2791    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2792    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2793    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2794    */
2795   __ bswapl(reg);
2796   SwapBits(reg, temp, 1, 0x55555555, assembler);
2797   SwapBits(reg, temp, 2, 0x33333333, assembler);
2798   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2799 }
2800 
VisitLongReverse(HInvoke * invoke)2801 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2802   LocationSummary* locations =
2803       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2804   locations->SetInAt(0, Location::RequiresRegister());
2805   locations->SetOut(Location::SameAsFirstInput());
2806   locations->AddTemp(Location::RequiresRegister());
2807   locations->AddTemp(Location::RequiresRegister());
2808 }
2809 
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2810 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2811                        int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2812   Immediate imm_shift(shift);
2813   __ movq(temp_mask, Immediate(mask));
2814   __ movq(temp, reg);
2815   __ shrq(reg, imm_shift);
2816   __ andq(temp, temp_mask);
2817   __ andq(reg, temp_mask);
2818   __ shlq(temp, imm_shift);
2819   __ orq(reg, temp);
2820 }
2821 
VisitLongReverse(HInvoke * invoke)2822 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2823   X86_64Assembler* assembler = GetAssembler();
2824   LocationSummary* locations = invoke->GetLocations();
2825 
2826   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2827   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2828   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2829 
2830   /*
2831    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2832    * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2833    * compared to generic luni implementation which has 5 rounds of swapping bits.
2834    * x = bswap x
2835    * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2836    * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2837    * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2838    */
2839   __ bswapq(reg);
2840   SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2841   SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2842   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2843 }
2844 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2845 static void CreateBitCountLocations(
2846     ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2847   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2848     // Do nothing if there is no popcnt support. This results in generating
2849     // a call for the intrinsic rather than direct code.
2850     return;
2851   }
2852   LocationSummary* locations =
2853       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2854   locations->SetInAt(0, Location::Any());
2855   locations->SetOut(Location::RequiresRegister());
2856 }
2857 
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2858 static void GenBitCount(X86_64Assembler* assembler,
2859                         CodeGeneratorX86_64* codegen,
2860                         HInvoke* invoke,
2861                         bool is_long) {
2862   LocationSummary* locations = invoke->GetLocations();
2863   Location src = locations->InAt(0);
2864   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2865 
2866   if (invoke->InputAt(0)->IsConstant()) {
2867     // Evaluate this at compile time.
2868     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2869     int32_t result = is_long
2870         ? POPCOUNT(static_cast<uint64_t>(value))
2871         : POPCOUNT(static_cast<uint32_t>(value));
2872     codegen->Load32BitValue(out, result);
2873     return;
2874   }
2875 
2876   if (src.IsRegister()) {
2877     if (is_long) {
2878       __ popcntq(out, src.AsRegister<CpuRegister>());
2879     } else {
2880       __ popcntl(out, src.AsRegister<CpuRegister>());
2881     }
2882   } else if (is_long) {
2883     DCHECK(src.IsDoubleStackSlot());
2884     __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2885   } else {
2886     DCHECK(src.IsStackSlot());
2887     __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2888   }
2889 }
2890 
VisitIntegerBitCount(HInvoke * invoke)2891 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2892   CreateBitCountLocations(allocator_, codegen_, invoke);
2893 }
2894 
VisitIntegerBitCount(HInvoke * invoke)2895 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2896   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2897 }
2898 
VisitLongBitCount(HInvoke * invoke)2899 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2900   CreateBitCountLocations(allocator_, codegen_, invoke);
2901 }
2902 
VisitLongBitCount(HInvoke * invoke)2903 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2904   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2905 }
2906 
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)2907 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
2908   LocationSummary* locations =
2909       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2910   locations->SetInAt(0, Location::Any());
2911   locations->SetOut(Location::RequiresRegister());
2912   locations->AddTemp(is_high ? Location::RegisterLocation(RCX)  // needs CL
2913                              : Location::RequiresRegister());  // any will do
2914 }
2915 
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2916 static void GenOneBit(X86_64Assembler* assembler,
2917                       CodeGeneratorX86_64* codegen,
2918                       HInvoke* invoke,
2919                       bool is_high, bool is_long) {
2920   LocationSummary* locations = invoke->GetLocations();
2921   Location src = locations->InAt(0);
2922   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2923 
2924   if (invoke->InputAt(0)->IsConstant()) {
2925     // Evaluate this at compile time.
2926     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2927     if (value == 0) {
2928       __ xorl(out, out);  // Clears upper bits too.
2929       return;
2930     }
2931     // Nonzero value.
2932     if (is_high) {
2933       value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2934                       : 31 - CLZ(static_cast<uint32_t>(value));
2935     } else {
2936       value = is_long ? CTZ(static_cast<uint64_t>(value))
2937                       : CTZ(static_cast<uint32_t>(value));
2938     }
2939     if (is_long) {
2940       codegen->Load64BitValue(out, 1ULL << value);
2941     } else {
2942       codegen->Load32BitValue(out, 1 << value);
2943     }
2944     return;
2945   }
2946 
2947   // Handle the non-constant cases.
2948   if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
2949       src.IsRegister()) {
2950       __ blsi(out, src.AsRegister<CpuRegister>());
2951   } else {
2952     CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2953     if (is_high) {
2954       // Use architectural support: basically 1 << bsr.
2955       if (src.IsRegister()) {
2956         if (is_long) {
2957           __ bsrq(tmp, src.AsRegister<CpuRegister>());
2958         } else {
2959           __ bsrl(tmp, src.AsRegister<CpuRegister>());
2960         }
2961       } else if (is_long) {
2962         DCHECK(src.IsDoubleStackSlot());
2963         __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2964       } else {
2965         DCHECK(src.IsStackSlot());
2966         __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2967       }
2968       // BSR sets ZF if the input was zero.
2969       NearLabel is_zero, done;
2970       __ j(kEqual, &is_zero);
2971       __ movl(out, Immediate(1));  // Clears upper bits too.
2972       if (is_long) {
2973         __ shlq(out, tmp);
2974       } else {
2975         __ shll(out, tmp);
2976       }
2977       __ jmp(&done);
2978       __ Bind(&is_zero);
2979       __ xorl(out, out);  // Clears upper bits too.
2980       __ Bind(&done);
2981     } else  {
2982       // Copy input into temporary.
2983       if (src.IsRegister()) {
2984         if (is_long) {
2985           __ movq(tmp, src.AsRegister<CpuRegister>());
2986         } else {
2987           __ movl(tmp, src.AsRegister<CpuRegister>());
2988         }
2989       } else if (is_long) {
2990         DCHECK(src.IsDoubleStackSlot());
2991         __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2992       } else {
2993         DCHECK(src.IsStackSlot());
2994         __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2995       }
2996       // Do the bit twiddling: basically tmp & -tmp;
2997       if (is_long) {
2998         __ movq(out, tmp);
2999         __ negq(tmp);
3000         __ andq(out, tmp);
3001       } else {
3002         __ movl(out, tmp);
3003         __ negl(tmp);
3004         __ andl(out, tmp);
3005       }
3006     }
3007   }
3008 }
3009 
VisitIntegerHighestOneBit(HInvoke * invoke)3010 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3011   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3012 }
3013 
VisitIntegerHighestOneBit(HInvoke * invoke)3014 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3015   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
3016 }
3017 
VisitLongHighestOneBit(HInvoke * invoke)3018 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3019   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3020 }
3021 
VisitLongHighestOneBit(HInvoke * invoke)3022 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3023   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
3024 }
3025 
VisitIntegerLowestOneBit(HInvoke * invoke)3026 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3027   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3028 }
3029 
VisitIntegerLowestOneBit(HInvoke * invoke)3030 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3031   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
3032 }
3033 
VisitLongLowestOneBit(HInvoke * invoke)3034 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3035   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3036 }
3037 
VisitLongLowestOneBit(HInvoke * invoke)3038 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3039   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
3040 }
3041 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3042 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3043   LocationSummary* locations =
3044       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3045   locations->SetInAt(0, Location::Any());
3046   locations->SetOut(Location::RequiresRegister());
3047 }
3048 
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3049 static void GenLeadingZeros(X86_64Assembler* assembler,
3050                             CodeGeneratorX86_64* codegen,
3051                             HInvoke* invoke, bool is_long) {
3052   LocationSummary* locations = invoke->GetLocations();
3053   Location src = locations->InAt(0);
3054   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3055 
3056   int zero_value_result = is_long ? 64 : 32;
3057   if (invoke->InputAt(0)->IsConstant()) {
3058     // Evaluate this at compile time.
3059     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3060     if (value == 0) {
3061       value = zero_value_result;
3062     } else {
3063       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
3064     }
3065     codegen->Load32BitValue(out, value);
3066     return;
3067   }
3068 
3069   // Handle the non-constant cases.
3070   if (src.IsRegister()) {
3071     if (is_long) {
3072       __ bsrq(out, src.AsRegister<CpuRegister>());
3073     } else {
3074       __ bsrl(out, src.AsRegister<CpuRegister>());
3075     }
3076   } else if (is_long) {
3077     DCHECK(src.IsDoubleStackSlot());
3078     __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3079   } else {
3080     DCHECK(src.IsStackSlot());
3081     __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3082   }
3083 
3084   // BSR sets ZF if the input was zero, and the output is undefined.
3085   NearLabel is_zero, done;
3086   __ j(kEqual, &is_zero);
3087 
3088   // Correct the result from BSR to get the CLZ result.
3089   __ xorl(out, Immediate(zero_value_result - 1));
3090   __ jmp(&done);
3091 
3092   // Fix the zero case with the expected result.
3093   __ Bind(&is_zero);
3094   __ movl(out, Immediate(zero_value_result));
3095 
3096   __ Bind(&done);
3097 }
3098 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3099 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3100   CreateLeadingZeroLocations(allocator_, invoke);
3101 }
3102 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3103 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3104   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3105 }
3106 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3107 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3108   CreateLeadingZeroLocations(allocator_, invoke);
3109 }
3110 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3111 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3112   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3113 }
3114 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3115 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3116   LocationSummary* locations =
3117       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3118   locations->SetInAt(0, Location::Any());
3119   locations->SetOut(Location::RequiresRegister());
3120 }
3121 
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3122 static void GenTrailingZeros(X86_64Assembler* assembler,
3123                              CodeGeneratorX86_64* codegen,
3124                              HInvoke* invoke, bool is_long) {
3125   LocationSummary* locations = invoke->GetLocations();
3126   Location src = locations->InAt(0);
3127   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3128 
3129   int zero_value_result = is_long ? 64 : 32;
3130   if (invoke->InputAt(0)->IsConstant()) {
3131     // Evaluate this at compile time.
3132     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3133     if (value == 0) {
3134       value = zero_value_result;
3135     } else {
3136       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3137     }
3138     codegen->Load32BitValue(out, value);
3139     return;
3140   }
3141 
3142   // Handle the non-constant cases.
3143   if (src.IsRegister()) {
3144     if (is_long) {
3145       __ bsfq(out, src.AsRegister<CpuRegister>());
3146     } else {
3147       __ bsfl(out, src.AsRegister<CpuRegister>());
3148     }
3149   } else if (is_long) {
3150     DCHECK(src.IsDoubleStackSlot());
3151     __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3152   } else {
3153     DCHECK(src.IsStackSlot());
3154     __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3155   }
3156 
3157   // BSF sets ZF if the input was zero, and the output is undefined.
3158   NearLabel done;
3159   __ j(kNotEqual, &done);
3160 
3161   // Fix the zero case with the expected result.
3162   __ movl(out, Immediate(zero_value_result));
3163 
3164   __ Bind(&done);
3165 }
3166 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3167 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3168   CreateTrailingZeroLocations(allocator_, invoke);
3169 }
3170 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3171 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3172   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3173 }
3174 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3175 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3176   CreateTrailingZeroLocations(allocator_, invoke);
3177 }
3178 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3179 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3180   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3181 }
3182 
3183 #define VISIT_INTRINSIC(name, low, high, type, start_index)                              \
3184   void IntrinsicLocationsBuilderX86_64::Visit##name##ValueOf(HInvoke* invoke) {          \
3185     InvokeRuntimeCallingConvention calling_convention;                                   \
3186     IntrinsicVisitor::ComputeValueOfLocations(                                           \
3187         invoke,                                                                          \
3188         codegen_,                                                                        \
3189         low,                                                                             \
3190         (high) - (low) + 1,                                                              \
3191         Location::RegisterLocation(RAX),                                                 \
3192         Location::RegisterLocation(calling_convention.GetRegisterAt(0)));                \
3193   }                                                                                      \
3194   void IntrinsicCodeGeneratorX86_64::Visit##name##ValueOf(HInvoke* invoke) {             \
3195     IntrinsicVisitor::ValueOfInfo info =                                                 \
3196         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                     \
3197                                              codegen_->GetCompilerOptions(),             \
3198                                              WellKnownClasses::java_lang_##name##_value, \
3199                                              low,                                        \
3200                                              (high) - (low) + 1,                         \
3201                                              start_index);                               \
3202     HandleValueOf(invoke, info, type);                                                   \
3203   }
BOXED_TYPES(VISIT_INTRINSIC)3204   BOXED_TYPES(VISIT_INTRINSIC)
3205 #undef VISIT_INTRINSIC
3206 
3207 template <typename T>
3208 static void Store(X86_64Assembler* assembler,
3209                   DataType::Type primitive_type,
3210                   const Address& address,
3211                   const T& operand) {
3212   switch (primitive_type) {
3213     case DataType::Type::kInt8:
3214     case DataType::Type::kUint8: {
3215       __ movb(address, operand);
3216       break;
3217     }
3218     case DataType::Type::kInt16:
3219     case DataType::Type::kUint16: {
3220       __ movw(address, operand);
3221       break;
3222     }
3223     case DataType::Type::kInt32: {
3224       __ movl(address, operand);
3225       break;
3226     }
3227     default: {
3228       LOG(FATAL) << "Unrecognized ValueOf type " << primitive_type;
3229     }
3230   }
3231 }
3232 
HandleValueOf(HInvoke * invoke,const IntrinsicVisitor::ValueOfInfo & info,DataType::Type type)3233 void IntrinsicCodeGeneratorX86_64::HandleValueOf(HInvoke* invoke,
3234                                                  const IntrinsicVisitor::ValueOfInfo& info,
3235                                                  DataType::Type type) {
3236   LocationSummary* locations = invoke->GetLocations();
3237   X86_64Assembler* assembler = GetAssembler();
3238 
3239   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3240   InvokeRuntimeCallingConvention calling_convention;
3241   CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3242   auto allocate_instance = [&]() {
3243     codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
3244     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3245     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3246   };
3247   if (invoke->InputAt(0)->IsIntConstant()) {
3248     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3249     if (static_cast<uint32_t>(value - info.low) < info.length) {
3250       // Just embed the object in the code.
3251       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3252       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3253     } else {
3254       DCHECK(locations->CanCall());
3255       // Allocate and initialize a new object.
3256       // TODO: If we JIT, we could allocate the boxed value now, and store it in the
3257       // JIT object table.
3258       allocate_instance();
3259       Store(assembler, type, Address(out, info.value_offset), Immediate(value));
3260     }
3261   } else {
3262     DCHECK(locations->CanCall());
3263     CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
3264     // Check bounds of our cache.
3265     __ leal(out, Address(in, -info.low));
3266     __ cmpl(out, Immediate(info.length));
3267     NearLabel allocate, done;
3268     __ j(kAboveEqual, &allocate);
3269     // If the value is within the bounds, load the boxed value directly from the array.
3270     DCHECK_NE(out.AsRegister(), argument.AsRegister());
3271     codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
3272     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3273                   "Check heap reference size.");
3274     __ movl(out, Address(argument, out, TIMES_4, 0));
3275     __ MaybeUnpoisonHeapReference(out);
3276     __ jmp(&done);
3277     __ Bind(&allocate);
3278     // Otherwise allocate and initialize a new object.
3279     allocate_instance();
3280     Store(assembler, type, Address(out, info.value_offset), in);
3281     __ Bind(&done);
3282   }
3283 }
3284 
VisitReferenceGetReferent(HInvoke * invoke)3285 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3286   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3287 }
3288 
VisitReferenceGetReferent(HInvoke * invoke)3289 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3290   X86_64Assembler* assembler = GetAssembler();
3291   LocationSummary* locations = invoke->GetLocations();
3292 
3293   Location obj = locations->InAt(0);
3294   Location out = locations->Out();
3295 
3296   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
3297   codegen_->AddSlowPath(slow_path);
3298 
3299   if (codegen_->EmitReadBarrier()) {
3300     // Check self->GetWeakRefAccessEnabled().
3301     ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
3302     __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
3303                   Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3304     __ j(kNotEqual, slow_path->GetEntryLabel());
3305   }
3306 
3307   // Load the java.lang.ref.Reference class, use the output register as a temporary.
3308   codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
3309 
3310   // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3311   MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3312   DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3313   DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3314             IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3315   __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
3316           Immediate(0));
3317   __ j(kNotEqual, slow_path->GetEntryLabel());
3318 
3319   // Load the value from the field.
3320   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3321   if (codegen_->EmitBakerReadBarrier()) {
3322     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3323                                                     out,
3324                                                     obj.AsRegister<CpuRegister>(),
3325                                                     referent_offset,
3326                                                     /*needs_null_check=*/ true);
3327     // Note that the fence is a no-op, thanks to the x86-64 memory model.
3328     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3329   } else {
3330     __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
3331     codegen_->MaybeRecordImplicitNullCheck(invoke);
3332     // Note that the fence is a no-op, thanks to the x86-64 memory model.
3333     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3334     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3335   }
3336   __ Bind(slow_path->GetExitLabel());
3337 }
3338 
VisitReferenceRefersTo(HInvoke * invoke)3339 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3340   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3341 }
3342 
VisitReferenceRefersTo(HInvoke * invoke)3343 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3344   X86_64Assembler* assembler = GetAssembler();
3345   LocationSummary* locations = invoke->GetLocations();
3346 
3347   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
3348   CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
3349   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3350 
3351   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3352   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3353 
3354   __ movl(out, Address(obj, referent_offset));
3355   codegen_->MaybeRecordImplicitNullCheck(invoke);
3356   __ MaybeUnpoisonHeapReference(out);
3357   // Note that the fence is a no-op, thanks to the x86-64 memory model.
3358   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3359 
3360   __ cmpl(out, other);
3361 
3362   if (codegen_->EmitReadBarrier()) {
3363     DCHECK(kUseBakerReadBarrier);
3364 
3365     NearLabel calculate_result;
3366     __ j(kEqual, &calculate_result);  // ZF set if taken.
3367 
3368     // Check if the loaded reference is null in a way that leaves ZF clear for null.
3369     __ cmpl(out, Immediate(1));
3370     __ j(kBelow, &calculate_result);  // ZF clear if taken.
3371 
3372     // For correct memory visibility, we need a barrier before loading the lock word
3373     // but we already have the barrier emitted for volatile load above which is sufficient.
3374 
3375     // Load the lockword and check if it is a forwarding address.
3376     static_assert(LockWord::kStateShift == 30u);
3377     static_assert(LockWord::kStateForwardingAddress == 3u);
3378     __ movl(out, Address(out, monitor_offset));
3379     __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3380     __ j(kBelow, &calculate_result);   // ZF clear if taken.
3381 
3382     // Extract the forwarding address and compare with `other`.
3383     __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3384     __ cmpl(out, other);
3385 
3386     __ Bind(&calculate_result);
3387   }
3388 
3389   // Convert ZF into the Boolean result.
3390   __ setcc(kEqual, out);
3391   __ movzxb(out, out);
3392 }
3393 
VisitThreadInterrupted(HInvoke * invoke)3394 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3395   LocationSummary* locations =
3396       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3397   locations->SetOut(Location::RequiresRegister());
3398 }
3399 
VisitThreadInterrupted(HInvoke * invoke)3400 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3401   X86_64Assembler* assembler = GetAssembler();
3402   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
3403   Address address = Address::Absolute
3404       (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
3405   NearLabel done;
3406   __ gs()->movl(out, address);
3407   __ testl(out, out);
3408   __ j(kEqual, &done);
3409   __ gs()->movl(address, Immediate(0));
3410   codegen_->MemoryFence();
3411   __ Bind(&done);
3412 }
3413 
VisitReachabilityFence(HInvoke * invoke)3414 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
3415   LocationSummary* locations =
3416       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3417   locations->SetInAt(0, Location::Any());
3418 }
3419 
VisitReachabilityFence(HInvoke * invoke)3420 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3421 
CreateDivideUnsignedLocations(HInvoke * invoke,ArenaAllocator * allocator)3422 static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
3423   LocationSummary* locations =
3424       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3425   locations->SetInAt(0, Location::RegisterLocation(RAX));
3426   locations->SetInAt(1, Location::RequiresRegister());
3427   locations->SetOut(Location::SameAsFirstInput());
3428   // Intel uses edx:eax as the dividend.
3429   locations->AddTemp(Location::RegisterLocation(RDX));
3430 }
3431 
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type data_type)3432 static void GenerateDivideUnsigned(HInvoke* invoke,
3433                                    CodeGeneratorX86_64* codegen,
3434                                    DataType::Type data_type) {
3435   LocationSummary* locations = invoke->GetLocations();
3436   Location out = locations->Out();
3437   Location first = locations->InAt(0);
3438   Location second = locations->InAt(1);
3439   CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3440   CpuRegister second_reg = second.AsRegister<CpuRegister>();
3441 
3442   DCHECK_EQ(RAX, first.AsRegister<Register>());
3443   DCHECK_EQ(RAX, out.AsRegister<Register>());
3444   DCHECK_EQ(RDX, rdx.AsRegister());
3445 
3446   // We check if the divisor is zero and bail to the slow path to handle if so.
3447   auto* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
3448   codegen->AddSlowPath(slow_path);
3449 
3450   X86_64Assembler* assembler = codegen->GetAssembler();
3451   if (data_type == DataType::Type::kInt32) {
3452     __ testl(second_reg, second_reg);
3453     __ j(kEqual, slow_path->GetEntryLabel());
3454     __ xorl(rdx, rdx);
3455     __ divl(second_reg);
3456   } else {
3457     DCHECK(data_type == DataType::Type::kInt64);
3458     __ testq(second_reg, second_reg);
3459     __ j(kEqual, slow_path->GetEntryLabel());
3460     __ xorq(rdx, rdx);
3461     __ divq(second_reg);
3462   }
3463   __ Bind(slow_path->GetExitLabel());
3464 }
3465 
VisitIntegerDivideUnsigned(HInvoke * invoke)3466 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3467   CreateDivideUnsignedLocations(invoke, allocator_);
3468 }
3469 
VisitIntegerDivideUnsigned(HInvoke * invoke)3470 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3471   GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt32);
3472 }
3473 
VisitLongDivideUnsigned(HInvoke * invoke)3474 void IntrinsicLocationsBuilderX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3475   CreateDivideUnsignedLocations(invoke, allocator_);
3476 }
3477 
VisitLongDivideUnsigned(HInvoke * invoke)3478 void IntrinsicCodeGeneratorX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3479   GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt64);
3480 }
3481 
VisitMathMultiplyHigh(HInvoke * invoke)3482 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3483   LocationSummary* locations =
3484       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3485   locations->SetInAt(0, Location::RegisterLocation(RAX));
3486   locations->SetInAt(1, Location::RequiresRegister());
3487   locations->SetOut(Location::RegisterLocation(RDX));
3488   locations->AddTemp(Location::RegisterLocation(RAX));
3489 }
3490 
VisitMathMultiplyHigh(HInvoke * invoke)3491 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3492   X86_64Assembler* assembler = GetAssembler();
3493   LocationSummary* locations = invoke->GetLocations();
3494 
3495   CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
3496 
3497   DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
3498   DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
3499 
3500   __ imulq(y);
3501 }
3502 
3503 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
3504  public:
VarHandleSlowPathX86_64(HInvoke * invoke)3505   explicit VarHandleSlowPathX86_64(HInvoke* invoke)
3506       : IntrinsicSlowPathX86_64(invoke) {
3507   }
3508 
SetVolatile(bool is_volatile)3509   void SetVolatile(bool is_volatile) {
3510     is_volatile_ = is_volatile;
3511   }
3512 
SetAtomic(bool is_atomic)3513   void SetAtomic(bool is_atomic) {
3514     is_atomic_ = is_atomic;
3515   }
3516 
SetNeedAnyStoreBarrier(bool need_any_store_barrier)3517   void SetNeedAnyStoreBarrier(bool need_any_store_barrier) {
3518     need_any_store_barrier_ = need_any_store_barrier;
3519   }
3520 
SetNeedAnyAnyBarrier(bool need_any_any_barrier)3521   void SetNeedAnyAnyBarrier(bool need_any_any_barrier) {
3522     need_any_any_barrier_ = need_any_any_barrier;
3523   }
3524 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3525   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3526     get_and_update_op_ = get_and_update_op;
3527   }
3528 
GetByteArrayViewCheckLabel()3529   Label* GetByteArrayViewCheckLabel() {
3530     return &byte_array_view_check_label_;
3531   }
3532 
GetNativeByteOrderLabel()3533   Label* GetNativeByteOrderLabel() {
3534     return &native_byte_order_label_;
3535   }
3536 
EmitNativeCode(CodeGenerator * codegen)3537   void EmitNativeCode(CodeGenerator* codegen) override {
3538     if (GetByteArrayViewCheckLabel()->IsLinked()) {
3539       EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
3540     }
3541     IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
3542   }
3543 
3544  private:
GetInvoke() const3545   HInvoke* GetInvoke() const {
3546     return GetInstruction()->AsInvoke();
3547   }
3548 
GetAccessModeTemplate() const3549   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3550     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3551   }
3552 
3553   void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
3554 
3555   Label byte_array_view_check_label_;
3556   Label native_byte_order_label_;
3557 
3558   // Arguments forwarded to specific methods.
3559   bool is_volatile_;
3560   bool is_atomic_;
3561   bool need_any_store_barrier_;
3562   bool need_any_any_barrier_;
3563   GetAndUpdateOp get_and_update_op_;
3564 };
3565 
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3566 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3567   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
3568   X86_64Assembler* assembler = codegen->GetAssembler();
3569   LocationSummary* locations = invoke->GetLocations();
3570   DCHECK(locations->InAt(0).Equals(locations->Out()));
3571   XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
3572   XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
3573   XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
3574   if (invoke->GetType() == DataType::Type::kFloat32) {
3575     __ vfmadd213ss(left, right, accumulator);
3576   } else {
3577     DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
3578     __ vfmadd213sd(left, right, accumulator);
3579   }
3580 }
3581 
VisitMathFmaDouble(HInvoke * invoke)3582 void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3583   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3584   GenerateMathFma(invoke, codegen_);
3585 }
3586 
VisitMathFmaDouble(HInvoke * invoke)3587 void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3588   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3589     CreateFPFPFPToFPCallLocations(allocator_, invoke);
3590   }
3591 }
3592 
VisitMathFmaFloat(HInvoke * invoke)3593 void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3594   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3595   GenerateMathFma(invoke, codegen_);
3596 }
3597 
VisitMathFmaFloat(HInvoke * invoke)3598 void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3599   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3600     CreateFPFPFPToFPCallLocations(allocator_, invoke);
3601   }
3602 }
3603 
3604 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,CpuRegister object,CpuRegister temp,Address type_address,bool object_can_be_null=true)3605 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
3606                                                     VarHandleSlowPathX86_64* slow_path,
3607                                                     CpuRegister object,
3608                                                     CpuRegister temp,
3609                                                     Address type_address,
3610                                                     bool object_can_be_null = true) {
3611   X86_64Assembler* assembler = codegen->GetAssembler();
3612 
3613   const MemberOffset class_offset = mirror::Object::ClassOffset();
3614   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3615 
3616   NearLabel check_type_compatibility, type_matched;
3617 
3618   // If the object is null, there is no need to check the type
3619   if (object_can_be_null) {
3620     __ testl(object, object);
3621     __ j(kZero, &type_matched);
3622   }
3623 
3624   // Do not unpoison for in-memory comparison.
3625   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3626   __ movl(temp, Address(object, class_offset));
3627   __ Bind(&check_type_compatibility);
3628   __ cmpl(temp, type_address);
3629   __ j(kEqual, &type_matched);
3630   // Load the super class.
3631   __ MaybeUnpoisonHeapReference(temp);
3632   __ movl(temp, Address(temp, super_class_offset));
3633   // If the super class is null, we reached the root of the hierarchy without a match.
3634   // We let the slow path handle uncovered cases (e.g. interfaces).
3635   __ testl(temp, temp);
3636   __ j(kEqual, slow_path->GetEntryLabel());
3637   __ jmp(&check_type_compatibility);
3638   __ Bind(&type_matched);
3639 }
3640 
3641 // Check access mode and the primitive type from VarHandle.varType.
3642 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3643 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,DataType::Type type)3644 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3645                                                         CodeGeneratorX86_64* codegen,
3646                                                         VarHandleSlowPathX86_64* slow_path,
3647                                                         DataType::Type type) {
3648   X86_64Assembler* assembler = codegen->GetAssembler();
3649 
3650   LocationSummary* locations = invoke->GetLocations();
3651   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3652   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3653 
3654   mirror::VarHandle::AccessMode access_mode =
3655       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3656   Primitive::Type primitive_type = DataTypeToPrimitive(type);
3657 
3658   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3659   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3660   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3661 
3662   // Check that the operation is permitted.
3663   __ testl(Address(varhandle, access_mode_bit_mask_offset),
3664            Immediate(1u << static_cast<uint32_t>(access_mode)));
3665   __ j(kZero, slow_path->GetEntryLabel());
3666 
3667   // For primitive types, we do not need a read barrier when loading a reference only for loading
3668   // constant field through the reference. For reference types, we deliberately avoid the read
3669   // barrier, letting the slow path handle the false negatives.
3670   __ movl(temp, Address(varhandle, var_type_offset));
3671   __ MaybeUnpoisonHeapReference(temp);
3672 
3673   // Check the varType.primitiveType field against the type we're trying to use.
3674   __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3675   __ j(kNotEqual, slow_path->GetEntryLabel());
3676 
3677   if (type == DataType::Type::kReference) {
3678     // Check reference arguments against the varType.
3679     // False negatives due to varType being an interface or array type
3680     // or due to the missing read barrier are handled by the slow path.
3681     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3682     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3683     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3684     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3685       HInstruction* arg = invoke->InputAt(arg_index);
3686       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3687       if (!arg->IsNullConstant()) {
3688         CpuRegister arg_reg = invoke->GetLocations()->InAt(arg_index).AsRegister<CpuRegister>();
3689         Address type_addr(varhandle, var_type_offset);
3690         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp, type_addr);
3691       }
3692     }
3693   }
3694 }
3695 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3696 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3697                                               CodeGeneratorX86_64* codegen,
3698                                               VarHandleSlowPathX86_64* slow_path) {
3699   X86_64Assembler* assembler = codegen->GetAssembler();
3700 
3701   LocationSummary* locations = invoke->GetLocations();
3702   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3703 
3704   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3705 
3706   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3707   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3708   __ cmpl(Address(varhandle, coordinate_type0_offset), Immediate(0));
3709   __ j(kNotEqual, slow_path->GetEntryLabel());
3710 }
3711 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3712 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3713                                                  CodeGeneratorX86_64* codegen,
3714                                                  VarHandleSlowPathX86_64* slow_path) {
3715   VarHandleOptimizations optimizations(invoke);
3716   X86_64Assembler* assembler = codegen->GetAssembler();
3717 
3718   LocationSummary* locations = invoke->GetLocations();
3719   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3720   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3721   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3722 
3723   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3724   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3725 
3726   // Null-check the object.
3727   if (!optimizations.GetSkipObjectNullCheck()) {
3728     __ testl(object, object);
3729     __ j(kZero, slow_path->GetEntryLabel());
3730   }
3731 
3732   if (!optimizations.GetUseKnownImageVarHandle()) {
3733     // Check that the VarHandle references an instance field by checking that
3734     // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3735     // type compatibility check with the source object's type, which will fail for null.
3736     __ cmpl(Address(varhandle, coordinate_type1_offset), Immediate(0));
3737     __ j(kNotEqual, slow_path->GetEntryLabel());
3738 
3739     // Check that the object has the correct type.
3740     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3741     GenerateSubTypeObjectCheckNoReadBarrier(codegen,
3742                                             slow_path,
3743                                             object,
3744                                             temp,
3745                                             Address(varhandle, coordinate_type0_offset),
3746                                             /*object_can_be_null=*/ false);
3747   }
3748 }
3749 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3750 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3751                                          CodeGeneratorX86_64* codegen,
3752                                          VarHandleSlowPathX86_64* slow_path) {
3753   VarHandleOptimizations optimizations(invoke);
3754   X86_64Assembler* assembler = codegen->GetAssembler();
3755   LocationSummary* locations = invoke->GetLocations();
3756 
3757   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3758   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3759   CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3760   DataType::Type value_type =
3761       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3762   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3763 
3764   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3765   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3766   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3767   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3768   const MemberOffset class_offset = mirror::Object::ClassOffset();
3769   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3770 
3771   // Null-check the object.
3772   if (!optimizations.GetSkipObjectNullCheck()) {
3773     __ testl(object, object);
3774     __ j(kZero, slow_path->GetEntryLabel());
3775   }
3776 
3777   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3778 
3779   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3780   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3781   // coordinateType0 shall not be null but we do not explicitly verify that.
3782   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3783   __ cmpl(Address(varhandle, coordinate_type1_offset.Int32Value()), Immediate(0));
3784   __ j(kEqual, slow_path->GetEntryLabel());
3785 
3786   // Check object class against componentType0.
3787   //
3788   // This is an exact check and we defer other cases to the runtime. This includes
3789   // conversion to array of superclass references, which is valid but subsequently
3790   // requires all update operations to check that the value can indeed be stored.
3791   // We do not want to perform such extra checks in the intrinsified code.
3792   //
3793   // We do this check without read barrier, so there can be false negatives which we
3794   // defer to the slow path. There shall be no false negatives for array classes in the
3795   // boot image (including Object[] and primitive arrays) because they are non-movable.
3796   __ movl(temp, Address(object, class_offset.Int32Value()));
3797   __ cmpl(temp, Address(varhandle, coordinate_type0_offset.Int32Value()));
3798   __ j(kNotEqual, slow_path->GetEntryLabel());
3799 
3800   // Check that the coordinateType0 is an array type. We do not need a read barrier
3801   // for loading constant reference fields (or chains of them) for comparison with null,
3802   // nor for finally loading a constant primitive field (primitive type) below.
3803   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3804   __ movl(temp, Address(temp, component_type_offset.Int32Value()));
3805   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3806   __ testl(temp, temp);
3807   __ j(kZero, slow_path->GetEntryLabel());
3808 
3809   // Check that the array component type matches the primitive type.
3810   Label* slow_path_label;
3811   if (primitive_type == Primitive::kPrimNot) {
3812     slow_path_label = slow_path->GetEntryLabel();
3813   } else {
3814     // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3815     // we shall check for a byte array view in the slow path.
3816     // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3817     // so we cannot emit that if we're JITting without boot image.
3818     bool boot_image_available =
3819         codegen->GetCompilerOptions().IsBootImage() ||
3820         !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3821     bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3822     slow_path_label =
3823         can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3824   }
3825   __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3826   __ j(kNotEqual, slow_path_label);
3827 
3828   // Check for array index out of bounds.
3829   __ cmpl(index, Address(object, array_length_offset.Int32Value()));
3830   __ j(kAboveEqual, slow_path->GetEntryLabel());
3831 }
3832 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3833 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
3834                                               CodeGeneratorX86_64* codegen,
3835                                               VarHandleSlowPathX86_64* slow_path) {
3836   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3837   if (expected_coordinates_count == 0u) {
3838     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
3839   } else if (expected_coordinates_count == 1u) {
3840     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
3841   } else {
3842     DCHECK_EQ(expected_coordinates_count, 2u);
3843     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
3844   }
3845 }
3846 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)3847 static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
3848                                                         CodeGeneratorX86_64* codegen,
3849                                                         DataType::Type type) {
3850   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3851   VarHandleOptimizations optimizations(invoke);
3852   if (optimizations.GetUseKnownImageVarHandle()) {
3853     DCHECK_NE(expected_coordinates_count, 2u);
3854     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
3855       return nullptr;
3856     }
3857   }
3858 
3859   VarHandleSlowPathX86_64* slow_path =
3860       new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
3861   codegen->AddSlowPath(slow_path);
3862 
3863   if (!optimizations.GetUseKnownImageVarHandle()) {
3864     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
3865   }
3866   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
3867 
3868   return slow_path;
3869 }
3870 
3871 struct VarHandleTarget {
3872   Register object;  // The object holding the value to operate on.
3873   Register offset;  // The offset of the value to operate on.
3874 };
3875 
GetVarHandleTarget(HInvoke * invoke)3876 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
3877   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3878   LocationSummary* locations = invoke->GetLocations();
3879 
3880   VarHandleTarget target;
3881   // The temporary allocated for loading the offset.
3882   target.offset = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
3883   // The reference to the object that holds the value to operate on.
3884   target.object = (expected_coordinates_count == 0u)
3885       ? locations->GetTemp(1).AsRegister<CpuRegister>().AsRegister()
3886       : locations->InAt(1).AsRegister<CpuRegister>().AsRegister();
3887   return target;
3888 }
3889 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorX86_64 * codegen)3890 static void GenerateVarHandleTarget(HInvoke* invoke,
3891                                     const VarHandleTarget& target,
3892                                     CodeGeneratorX86_64* codegen) {
3893   LocationSummary* locations = invoke->GetLocations();
3894   X86_64Assembler* assembler = codegen->GetAssembler();
3895   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3896 
3897   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3898 
3899   if (expected_coordinates_count <= 1u) {
3900     if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
3901       ScopedObjectAccess soa(Thread::Current());
3902       ArtField* target_field = GetBootImageVarHandleField(invoke);
3903       if (expected_coordinates_count == 0u) {
3904         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
3905         __ movl(CpuRegister(target.object),
3906                 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /*no_rip=*/ false));
3907         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
3908           codegen->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(declaring_class));
3909         } else {
3910           codegen->RecordBootImageTypePatch(declaring_class->GetDexFile(),
3911                                             declaring_class->GetDexTypeIndex());
3912         }
3913       }
3914       __ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value()));
3915     } else {
3916       // For static fields, we need to fill the `target.object` with the declaring class,
3917       // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
3918       // we do not need the declaring class, so we can forget the `ArtField*` when
3919       // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
3920       CpuRegister field((expected_coordinates_count == 0) ? target.object : target.offset);
3921 
3922       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
3923       const MemberOffset offset_offset = ArtField::OffsetOffset();
3924 
3925       // Load the ArtField*, the offset and, if needed, declaring class.
3926       __ movq(field, Address(varhandle, art_field_offset));
3927       __ movl(CpuRegister(target.offset), Address(field, offset_offset));
3928       if (expected_coordinates_count == 0u) {
3929         InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
3930         instr_codegen->GenerateGcRootFieldLoad(invoke,
3931                                                Location::RegisterLocation(target.object),
3932                                                Address(field, ArtField::DeclaringClassOffset()),
3933                                                /*fixup_label=*/nullptr,
3934                                                codegen->GetCompilerReadBarrierOption());
3935       }
3936     }
3937   } else {
3938     DCHECK_EQ(expected_coordinates_count, 2u);
3939 
3940     DataType::Type value_type =
3941         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3942     ScaleFactor scale = CodeGenerator::ScaleFactorForType(value_type);
3943     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
3944     CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3945 
3946     // The effect of LEA is `target.offset = index * scale + data_offset`.
3947     __ leal(CpuRegister(target.offset), Address(index, scale, data_offset.Int32Value()));
3948   }
3949 }
3950 
HasVarHandleIntrinsicImplementation(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3951 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3952   // The only supported read barrier implementation is the Baker-style read barriers.
3953   if (codegen->EmitNonBakerReadBarrier()) {
3954     return false;
3955   }
3956 
3957   VarHandleOptimizations optimizations(invoke);
3958   if (optimizations.GetDoNotIntrinsify()) {
3959     return false;
3960   }
3961 
3962   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3963   DCHECK_LE(expected_coordinates_count, 2u);  // Filtered by the `DoNotIntrinsify` flag above.
3964   return true;
3965 }
3966 
CreateVarHandleCommonLocations(HInvoke * invoke)3967 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
3968   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3969   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3970   LocationSummary* locations = new (allocator) LocationSummary(
3971       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3972 
3973   locations->SetInAt(0, Location::RequiresRegister());
3974   // Require coordinates in registers. These are the object holding the value
3975   // to operate on (except for static fields) and index (for arrays and views).
3976   for (size_t i = 0; i != expected_coordinates_count; ++i) {
3977     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
3978   }
3979 
3980   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3981   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3982   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3983     HInstruction* arg = invoke->InputAt(arg_index);
3984     if (DataType::IsFloatingPointType(arg->GetType())) {
3985       locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
3986     } else {
3987       locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
3988     }
3989   }
3990 
3991   // Add a temporary for offset.
3992   locations->AddTemp(Location::RequiresRegister());
3993 
3994   if (expected_coordinates_count == 0u) {
3995     // Add a temporary to hold the declaring class.
3996     locations->AddTemp(Location::RequiresRegister());
3997   }
3998 
3999   return locations;
4000 }
4001 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4002 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4003   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4004     return;
4005   }
4006 
4007   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4008   if (DataType::IsFloatingPointType(invoke->GetType())) {
4009     locations->SetOut(Location::RequiresFpuRegister());
4010   } else {
4011     locations->SetOut(Location::RequiresRegister());
4012   }
4013 }
4014 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool byte_swap=false)4015 static void GenerateVarHandleGet(HInvoke* invoke,
4016                                  CodeGeneratorX86_64* codegen,
4017                                  bool byte_swap = false) {
4018   DataType::Type type = invoke->GetType();
4019   DCHECK_NE(type, DataType::Type::kVoid);
4020 
4021   LocationSummary* locations = invoke->GetLocations();
4022   X86_64Assembler* assembler = codegen->GetAssembler();
4023 
4024   VarHandleTarget target = GetVarHandleTarget(invoke);
4025   VarHandleSlowPathX86_64* slow_path = nullptr;
4026   if (!byte_swap) {
4027     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4028     GenerateVarHandleTarget(invoke, target, codegen);
4029     if (slow_path != nullptr) {
4030       __ Bind(slow_path->GetNativeByteOrderLabel());
4031     }
4032   }
4033 
4034   // Load the value from the field
4035   Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4036   Location out = locations->Out();
4037 
4038   if (type == DataType::Type::kReference) {
4039     if (codegen->EmitReadBarrier()) {
4040       DCHECK(kUseBakerReadBarrier);
4041       codegen->GenerateReferenceLoadWithBakerReadBarrier(
4042           invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
4043     } else {
4044       __ movl(out.AsRegister<CpuRegister>(), src);
4045       __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
4046     }
4047     DCHECK(!byte_swap);
4048   } else {
4049     codegen->LoadFromMemoryNoReference(type, out, src);
4050     if (byte_swap) {
4051       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4052       codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
4053     }
4054   }
4055 
4056   if (slow_path != nullptr) {
4057     DCHECK(!byte_swap);
4058     __ Bind(slow_path->GetExitLabel());
4059   }
4060 }
4061 
VisitVarHandleGet(HInvoke * invoke)4062 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
4063   CreateVarHandleGetLocations(invoke, codegen_);
4064 }
4065 
VisitVarHandleGet(HInvoke * invoke)4066 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
4067   GenerateVarHandleGet(invoke, codegen_);
4068 }
4069 
VisitVarHandleGetAcquire(HInvoke * invoke)4070 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4071   CreateVarHandleGetLocations(invoke, codegen_);
4072 }
4073 
VisitVarHandleGetAcquire(HInvoke * invoke)4074 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4075   // VarHandleGetAcquire is the same as VarHandleGet on x86-64 due to the x86 memory model.
4076   GenerateVarHandleGet(invoke, codegen_);
4077 }
4078 
VisitVarHandleGetOpaque(HInvoke * invoke)4079 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4080   CreateVarHandleGetLocations(invoke, codegen_);
4081 }
4082 
VisitVarHandleGetOpaque(HInvoke * invoke)4083 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4084   // VarHandleGetOpaque is the same as VarHandleGet on x86-64 due to the x86 memory model.
4085   GenerateVarHandleGet(invoke, codegen_);
4086 }
4087 
VisitVarHandleGetVolatile(HInvoke * invoke)4088 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4089   CreateVarHandleGetLocations(invoke, codegen_);
4090 }
4091 
VisitVarHandleGetVolatile(HInvoke * invoke)4092 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4093   // VarHandleGetVolatile is the same as VarHandleGet on x86-64 due to the x86 memory model.
4094   GenerateVarHandleGet(invoke, codegen_);
4095 }
4096 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4097 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4098   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4099     return;
4100   }
4101 
4102   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4103 
4104   // Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
4105   locations->AddTemp(Location::RequiresRegister());
4106 }
4107 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_volatile,bool is_atomic,bool byte_swap=false)4108 static void GenerateVarHandleSet(HInvoke* invoke,
4109                                  CodeGeneratorX86_64* codegen,
4110                                  bool is_volatile,
4111                                  bool is_atomic,
4112                                  bool byte_swap = false) {
4113   X86_64Assembler* assembler = codegen->GetAssembler();
4114 
4115   LocationSummary* locations = invoke->GetLocations();
4116   const uint32_t last_temp_index = locations->GetTempCount() - 1;
4117 
4118   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4119   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4120 
4121   VarHandleTarget target = GetVarHandleTarget(invoke);
4122   VarHandleSlowPathX86_64* slow_path = nullptr;
4123   if (!byte_swap) {
4124     slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
4125     GenerateVarHandleTarget(invoke, target, codegen);
4126     if (slow_path != nullptr) {
4127       slow_path->SetVolatile(is_volatile);
4128       slow_path->SetAtomic(is_atomic);
4129       __ Bind(slow_path->GetNativeByteOrderLabel());
4130     }
4131   }
4132 
4133   switch (invoke->GetIntrinsic()) {
4134     case Intrinsics::kVarHandleSetRelease:
4135       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4136       break;
4137     case Intrinsics::kVarHandleSetVolatile:
4138       // setVolatile needs kAnyStore barrier, but HandleFieldSet takes care of that.
4139       break;
4140     default:
4141       // Other intrinsics don't need a barrier.
4142       break;
4143   }
4144 
4145   Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4146 
4147   // Store the value to the field.
4148   codegen->GetInstructionCodegen()->HandleFieldSet(
4149       invoke,
4150       value_index,
4151       last_temp_index,
4152       value_type,
4153       dst,
4154       CpuRegister(target.object),
4155       is_volatile,
4156       is_atomic,
4157       /*value_can_be_null=*/true,
4158       byte_swap,
4159       // Value can be null, and this write barrier is not being relied on for other sets.
4160       value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4161                                                  WriteBarrierKind::kDontEmit);
4162 
4163   // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
4164 
4165   if (slow_path != nullptr) {
4166     DCHECK(!byte_swap);
4167     __ Bind(slow_path->GetExitLabel());
4168   }
4169 }
4170 
VisitVarHandleSet(HInvoke * invoke)4171 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
4172   CreateVarHandleSetLocations(invoke, codegen_);
4173 }
4174 
VisitVarHandleSet(HInvoke * invoke)4175 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
4176   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4177 }
4178 
VisitVarHandleSetOpaque(HInvoke * invoke)4179 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4180   CreateVarHandleSetLocations(invoke, codegen_);
4181 }
4182 
VisitVarHandleSetOpaque(HInvoke * invoke)4183 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4184   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4185 }
4186 
VisitVarHandleSetRelease(HInvoke * invoke)4187 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4188   CreateVarHandleSetLocations(invoke, codegen_);
4189 }
4190 
VisitVarHandleSetRelease(HInvoke * invoke)4191 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4192   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4193 }
4194 
VisitVarHandleSetVolatile(HInvoke * invoke)4195 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4196   CreateVarHandleSetLocations(invoke, codegen_);
4197 }
4198 
VisitVarHandleSetVolatile(HInvoke * invoke)4199 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4200   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
4201 }
4202 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4203 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4204                                                             CodeGeneratorX86_64* codegen) {
4205   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4206     return;
4207   }
4208 
4209   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4210   uint32_t expected_value_index = number_of_arguments - 2;
4211   uint32_t new_value_index = number_of_arguments - 1;
4212   DataType::Type return_type = invoke->GetType();
4213   DataType::Type expected_type = GetDataTypeFromShorty(invoke, expected_value_index);
4214   DCHECK_EQ(expected_type, GetDataTypeFromShorty(invoke, new_value_index));
4215 
4216   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4217 
4218   if (DataType::IsFloatingPointType(return_type)) {
4219     locations->SetOut(Location::RequiresFpuRegister());
4220   } else {
4221     // Take advantage of the fact that CMPXCHG writes result to RAX.
4222     locations->SetOut(Location::RegisterLocation(RAX));
4223   }
4224 
4225   if (DataType::IsFloatingPointType(expected_type)) {
4226     // RAX is needed to load the expected floating-point value into a register for CMPXCHG.
4227     locations->AddTemp(Location::RegisterLocation(RAX));
4228     // Another temporary is needed to load the new floating-point value into a register for CMPXCHG.
4229     locations->AddTemp(Location::RequiresRegister());
4230   } else {
4231     // Ensure that expected value is in RAX, as required by CMPXCHG.
4232     locations->SetInAt(expected_value_index, Location::RegisterLocation(RAX));
4233     locations->SetInAt(new_value_index, Location::RequiresRegister());
4234     if (expected_type == DataType::Type::kReference) {
4235       // Need two temporaries for MarkGCCard.
4236       locations->AddTemp(Location::RequiresRegister());
4237       locations->AddTemp(Location::RequiresRegister());
4238       if (codegen->EmitReadBarrier()) {
4239         // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
4240         DCHECK(kUseBakerReadBarrier);
4241         locations->AddTemp(Location::RequiresRegister());
4242       }
4243     }
4244     // RAX is clobbered in CMPXCHG, but no need to mark it as temporary as it's the output register.
4245     DCHECK_EQ(RAX, locations->Out().AsRegister<Register>());
4246   }
4247 }
4248 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_cmpxchg,bool byte_swap=false)4249 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4250                                                      CodeGeneratorX86_64* codegen,
4251                                                      bool is_cmpxchg,
4252                                                      bool byte_swap = false) {
4253   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4254 
4255   X86_64Assembler* assembler = codegen->GetAssembler();
4256   LocationSummary* locations = invoke->GetLocations();
4257 
4258   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4259   uint32_t expected_value_index = number_of_arguments - 2;
4260   uint32_t new_value_index = number_of_arguments - 1;
4261   DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4262 
4263   VarHandleSlowPathX86_64* slow_path = nullptr;
4264   VarHandleTarget target = GetVarHandleTarget(invoke);
4265   if (!byte_swap) {
4266     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4267     GenerateVarHandleTarget(invoke, target, codegen);
4268     if (slow_path != nullptr) {
4269       __ Bind(slow_path->GetNativeByteOrderLabel());
4270     }
4271   }
4272 
4273   uint32_t temp_count = locations->GetTempCount();
4274   GenCompareAndSetOrExchange(codegen,
4275                              invoke,
4276                              type,
4277                              CpuRegister(target.object),
4278                              CpuRegister(target.offset),
4279                              /*temp1_index=*/ temp_count - 1,
4280                              /*temp2_index=*/ temp_count - 2,
4281                              /*temp3_index=*/ temp_count - 3,
4282                              locations->InAt(new_value_index),
4283                              locations->InAt(expected_value_index),
4284                              locations->Out(),
4285                              is_cmpxchg,
4286                              byte_swap);
4287 
4288   // We are using LOCK CMPXCHG in all cases because there is no CAS equivalent that has weak
4289   // failure semantics. LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
4290 
4291   if (slow_path != nullptr) {
4292     DCHECK(!byte_swap);
4293     __ Bind(slow_path->GetExitLabel());
4294   }
4295 }
4296 
VisitVarHandleCompareAndSet(HInvoke * invoke)4297 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4298   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4299 }
4300 
VisitVarHandleCompareAndSet(HInvoke * invoke)4301 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4302   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4303 }
4304 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4305 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4306   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4307 }
4308 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4309 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4310   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4311 }
4312 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4313 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4314   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4315 }
4316 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4317 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4318   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4319 }
4320 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4321 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4322   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4323 }
4324 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4325 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4326   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4327 }
4328 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4329 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4330   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4331 }
4332 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4333 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4334   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4335 }
4336 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4337 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4338   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4339 }
4340 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4341 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4342   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4343 }
4344 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4345 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4346   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4347 }
4348 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4349 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4350   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4351 }
4352 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4353 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4354   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4355 }
4356 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4357 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4358   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4359 }
4360 
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4361 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4362   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4363     return;
4364   }
4365 
4366   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4367   uint32_t new_value_index = number_of_arguments - 1;
4368   DataType::Type type = invoke->GetType();
4369   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4370 
4371   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4372 
4373   if (DataType::IsFloatingPointType(type)) {
4374     locations->SetOut(Location::RequiresFpuRegister());
4375     // A temporary is needed to load the new floating-point value into a register for XCHG.
4376     locations->AddTemp(Location::RequiresRegister());
4377   } else {
4378     // Use the same register for both the new value and output to take advantage of XCHG.
4379     // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
4380     locations->SetOut(Location::RegisterLocation(RAX));
4381     locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4382     if (type == DataType::Type::kReference) {
4383       // Need two temporaries for MarkGCCard.
4384       locations->AddTemp(Location::RequiresRegister());
4385       locations->AddTemp(Location::RequiresRegister());
4386       if (codegen->EmitReadBarrier()) {
4387         // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
4388         DCHECK(kUseBakerReadBarrier);
4389         locations->AddTemp(Location::RequiresRegister());
4390       }
4391     }
4392   }
4393 }
4394 
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,CpuRegister ref,bool byte_swap)4395 static void GenerateVarHandleGetAndSet(HInvoke* invoke,
4396                                        CodeGeneratorX86_64* codegen,
4397                                        Location value,
4398                                        DataType::Type type,
4399                                        Address field_addr,
4400                                        CpuRegister ref,
4401                                        bool byte_swap) {
4402   X86_64Assembler* assembler = codegen->GetAssembler();
4403   LocationSummary* locations = invoke->GetLocations();
4404   Location out = locations->Out();
4405   uint32_t temp_count = locations->GetTempCount();
4406 
4407   if (DataType::IsFloatingPointType(type)) {
4408     // `getAndSet` for floating-point types: move the new FP value into a register, atomically
4409     // exchange it with the field, and move the old value into the output FP register.
4410     Location temp = locations->GetTemp(temp_count - 1);
4411     codegen->Move(temp, value);
4412     bool is64bit = (type == DataType::Type::kFloat64);
4413     DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4414     if (byte_swap) {
4415       codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4416     }
4417     if (is64bit) {
4418       __ xchgq(temp.AsRegister<CpuRegister>(), field_addr);
4419     } else {
4420       __ xchgl(temp.AsRegister<CpuRegister>(), field_addr);
4421     }
4422     if (byte_swap) {
4423       codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4424     }
4425     __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit);
4426   } else if (type == DataType::Type::kReference) {
4427     // `getAndSet` for references: load reference and atomically exchange it with the field.
4428     // Output register is the same as the one holding new value, so no need to move the result.
4429     DCHECK(!byte_swap);
4430 
4431     CpuRegister temp1 = locations->GetTemp(temp_count - 1).AsRegister<CpuRegister>();
4432     CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>();
4433     CpuRegister valreg = value.AsRegister<CpuRegister>();
4434 
4435     if (codegen->EmitBakerReadBarrier()) {
4436       codegen->GenerateReferenceLoadWithBakerReadBarrier(
4437           invoke,
4438           locations->GetTemp(temp_count - 3),
4439           ref,
4440           field_addr,
4441           /*needs_null_check=*/ false,
4442           /*always_update_field=*/ true,
4443           &temp1,
4444           &temp2);
4445     }
4446     codegen->MarkGCCard(temp1, temp2, ref);
4447 
4448     DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4449     if (kPoisonHeapReferences) {
4450       // Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is
4451       // the same as `target.object` (for code like `vh.getAndSet(obj, obj)`).
4452       __ movl(temp1, valreg);
4453       __ PoisonHeapReference(temp1);
4454       __ xchgl(temp1, field_addr);
4455       __ UnpoisonHeapReference(temp1);
4456       __ movl(valreg, temp1);
4457     } else {
4458       __ xchgl(valreg, field_addr);
4459     }
4460   } else {
4461     // `getAndSet` for integral types: atomically exchange the new value with the field. Output
4462     // register is the same as the one holding new value. Do sign extend / zero extend as needed.
4463     if (byte_swap) {
4464       codegen->GetInstructionCodegen()->Bswap(value, type);
4465     }
4466     CpuRegister valreg = value.AsRegister<CpuRegister>();
4467     DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4468     switch (type) {
4469       case DataType::Type::kBool:
4470       case DataType::Type::kUint8:
4471         __ xchgb(valreg, field_addr);
4472         __ movzxb(valreg, valreg);
4473         break;
4474       case DataType::Type::kInt8:
4475         __ xchgb(valreg, field_addr);
4476         __ movsxb(valreg, valreg);
4477         break;
4478       case DataType::Type::kUint16:
4479         __ xchgw(valreg, field_addr);
4480         __ movzxw(valreg, valreg);
4481         break;
4482       case DataType::Type::kInt16:
4483         __ xchgw(valreg, field_addr);
4484         __ movsxw(valreg, valreg);
4485         break;
4486       case DataType::Type::kInt32:
4487       case DataType::Type::kUint32:
4488         __ xchgl(valreg, field_addr);
4489         break;
4490       case DataType::Type::kInt64:
4491       case DataType::Type::kUint64:
4492         __ xchgq(valreg, field_addr);
4493         break;
4494       default:
4495         LOG(FATAL) << "unexpected type in getAndSet intrinsic: " << type;
4496         UNREACHABLE();
4497     }
4498     if (byte_swap) {
4499       codegen->GetInstructionCodegen()->Bswap(value, type);
4500     }
4501   }
4502 }
4503 
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4504 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4505   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4506     return;
4507   }
4508 
4509   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4510   uint32_t new_value_index = number_of_arguments - 1;
4511   DataType::Type type = invoke->GetType();
4512   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4513 
4514   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4515 
4516   DCHECK_NE(DataType::Type::kReference, type);
4517   DCHECK(!DataType::IsFloatingPointType(type));
4518 
4519   // A temporary to compute the bitwise operation on the old and the new values.
4520   locations->AddTemp(Location::RequiresRegister());
4521   // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
4522   // instructions that accept 64-bit immediate on x86_64).
4523   locations->SetInAt(new_value_index, DataType::Is64BitType(type)
4524       ? Location::RequiresRegister()
4525       : Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
4526   // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
4527   locations->SetOut(Location::RegisterLocation(RAX));
4528 }
4529 
GenerateVarHandleGetAndOp(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,GetAndUpdateOp get_and_update_op,bool byte_swap)4530 static void GenerateVarHandleGetAndOp(HInvoke* invoke,
4531                                       CodeGeneratorX86_64* codegen,
4532                                       Location value,
4533                                       DataType::Type type,
4534                                       Address field_addr,
4535                                       GetAndUpdateOp get_and_update_op,
4536                                       bool byte_swap) {
4537   X86_64Assembler* assembler = codegen->GetAssembler();
4538   LocationSummary* locations = invoke->GetLocations();
4539   Location temp_loc = locations->GetTemp(locations->GetTempCount() - 1);
4540   Location rax_loc = locations->Out();
4541   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4542   CpuRegister rax = rax_loc.AsRegister<CpuRegister>();
4543   DCHECK_EQ(rax.AsRegister(), RAX);
4544   bool is64Bit = DataType::Is64BitType(type);
4545 
4546   NearLabel retry;
4547   __ Bind(&retry);
4548 
4549   // Load field value into RAX and copy it into a temporary register for the operation.
4550   codegen->LoadFromMemoryNoReference(type, Location::RegisterLocation(RAX), field_addr);
4551   codegen->Move(temp_loc, rax_loc);
4552   if (byte_swap) {
4553     // Byte swap the temporary, since we need to perform operation in native endianness.
4554     codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4555   }
4556 
4557   DCHECK_IMPLIES(value.IsConstant(), !is64Bit);
4558   int32_t const_value = value.IsConstant()
4559       ? CodeGenerator::GetInt32ValueOf(value.GetConstant())
4560       : 0;
4561 
4562   // Use 32-bit registers for 8/16/32-bit types to save on the REX prefix.
4563   switch (get_and_update_op) {
4564     case GetAndUpdateOp::kAdd:
4565       DCHECK(byte_swap);  // The non-byte-swapping path should use a faster XADD instruction.
4566       if (is64Bit) {
4567         __ addq(temp, value.AsRegister<CpuRegister>());
4568       } else if (value.IsConstant()) {
4569         __ addl(temp, Immediate(const_value));
4570       } else {
4571         __ addl(temp, value.AsRegister<CpuRegister>());
4572       }
4573       break;
4574     case GetAndUpdateOp::kBitwiseAnd:
4575       if (is64Bit) {
4576         __ andq(temp, value.AsRegister<CpuRegister>());
4577       } else if (value.IsConstant()) {
4578         __ andl(temp, Immediate(const_value));
4579       } else {
4580         __ andl(temp, value.AsRegister<CpuRegister>());
4581       }
4582       break;
4583     case GetAndUpdateOp::kBitwiseOr:
4584       if (is64Bit) {
4585         __ orq(temp, value.AsRegister<CpuRegister>());
4586       } else if (value.IsConstant()) {
4587         __ orl(temp, Immediate(const_value));
4588       } else {
4589         __ orl(temp, value.AsRegister<CpuRegister>());
4590       }
4591       break;
4592     case GetAndUpdateOp::kBitwiseXor:
4593       if (is64Bit) {
4594         __ xorq(temp, value.AsRegister<CpuRegister>());
4595       } else if (value.IsConstant()) {
4596         __ xorl(temp, Immediate(const_value));
4597       } else {
4598         __ xorl(temp, value.AsRegister<CpuRegister>());
4599       }
4600       break;
4601     default:
4602       LOG(FATAL) <<  "unexpected operation";
4603       UNREACHABLE();
4604   }
4605 
4606   if (byte_swap) {
4607     // RAX still contains the original value, but we need to byte swap the temporary back.
4608     codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4609   }
4610 
4611   switch (type) {
4612     case DataType::Type::kBool:
4613     case DataType::Type::kUint8:
4614     case DataType::Type::kInt8:
4615       __ LockCmpxchgb(field_addr, temp);
4616       break;
4617     case DataType::Type::kUint16:
4618     case DataType::Type::kInt16:
4619       __ LockCmpxchgw(field_addr, temp);
4620       break;
4621     case DataType::Type::kInt32:
4622     case DataType::Type::kUint32:
4623       __ LockCmpxchgl(field_addr, temp);
4624       break;
4625     case DataType::Type::kInt64:
4626     case DataType::Type::kUint64:
4627       __ LockCmpxchgq(field_addr, temp);
4628       break;
4629     default:
4630       LOG(FATAL) << "unexpected type in getAndBitwiseOp intrinsic";
4631       UNREACHABLE();
4632   }
4633 
4634   __ j(kNotZero, &retry);
4635 
4636   // The result is in RAX after CMPXCHG. Byte swap if necessary, but do not sign/zero extend,
4637   // as it has already been done by `LoadFromMemoryNoReference` above (and not altered by CMPXCHG).
4638   if (byte_swap) {
4639     codegen->GetInstructionCodegen()->Bswap(rax_loc, type);
4640   }
4641 }
4642 
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4643 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4644   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4645     return;
4646   }
4647 
4648   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4649   uint32_t new_value_index = number_of_arguments - 1;
4650   DataType::Type type = invoke->GetType();
4651   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4652 
4653   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4654 
4655   if (DataType::IsFloatingPointType(type)) {
4656     locations->SetOut(Location::RequiresFpuRegister());
4657     // Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD.
4658     locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
4659     // CMPXCHG clobbers RAX.
4660     locations->AddTemp(Location::RegisterLocation(RAX));
4661     // An FP temporary to load the old value from the field and perform FP addition.
4662     locations->AddTemp(Location::RequiresFpuRegister());
4663     // A temporary to hold the new value for CMPXCHG.
4664     locations->AddTemp(Location::RequiresRegister());
4665   } else {
4666     DCHECK_NE(type, DataType::Type::kReference);
4667     // Use the same register for both the new value and output to take advantage of XADD.
4668     // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
4669     // back to GenerateVarHandleGetAndOp that expects out in RAX.
4670     locations->SetOut(Location::RegisterLocation(RAX));
4671     locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4672     if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
4673       // For byte array views with non-native endianness we need extra BSWAP operations, so we
4674       // cannot use XADD and have to fallback to a generic implementation based on CMPXCH. In that
4675       // case we need two temporary registers: one to hold value instead of RAX (which may get
4676       // clobbered by repeated CMPXCHG) and one for performing the operation. At compile time we
4677       // cannot distinguish this case from arrays or native-endian byte array views.
4678       locations->AddTemp(Location::RequiresRegister());
4679       locations->AddTemp(Location::RequiresRegister());
4680     }
4681   }
4682 }
4683 
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,bool byte_swap)4684 static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
4685                                        CodeGeneratorX86_64* codegen,
4686                                        Location value,
4687                                        DataType::Type type,
4688                                        Address field_addr,
4689                                        bool byte_swap) {
4690   X86_64Assembler* assembler = codegen->GetAssembler();
4691   LocationSummary* locations = invoke->GetLocations();
4692   Location out = locations->Out();
4693   uint32_t temp_count = locations->GetTempCount();
4694 
4695   if (DataType::IsFloatingPointType(type)) {
4696     if (byte_swap) {
4697       // This code should never be executed: it is the case of a byte array view (since it requires
4698       // a byte swap), and varhandles for byte array views support numeric atomic update access mode
4699       // only for int and long, but not for floating-point types (see javadoc comments for
4700       // java.lang.invoke.MethodHandles.byteArrayViewVarHandle()). But ART varhandle implementation
4701       // for byte array views treats floating-point types them as numeric types in
4702       // ByteArrayViewVarHandle::Access(). Terefore we do generate intrinsic code, but it always
4703       // fails access mode check at runtime prior to reaching this point. Illegal instruction UD2
4704       // ensures that if control flow gets here by mistake, we will notice.
4705       __ ud2();
4706     }
4707 
4708     // `getAndAdd` for floating-point types: load the old FP value into a temporary FP register and
4709     // in RAX for CMPXCHG, add the new FP value to the old one, move it to a non-FP temporary for
4710     // CMPXCHG and loop until CMPXCHG succeeds. Move the result from RAX to the output FP register.
4711     bool is64bit = (type == DataType::Type::kFloat64);
4712     DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4713     XmmRegister fptemp = locations->GetTemp(temp_count - 2).AsFpuRegister<XmmRegister>();
4714     Location rax_loc = Location::RegisterLocation(RAX);
4715     Location temp_loc = locations->GetTemp(temp_count - 1);
4716     CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4717 
4718     NearLabel retry;
4719     __ Bind(&retry);
4720 
4721     // Read value from memory into an FP register and copy in into RAX.
4722     if (is64bit) {
4723       __ movsd(fptemp, field_addr);
4724     } else {
4725       __ movss(fptemp, field_addr);
4726     }
4727     __ movd(CpuRegister(RAX), fptemp, is64bit);
4728     // If necessary, byte swap RAX and update the value in FP register to also be byte-swapped.
4729     if (byte_swap) {
4730       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4731       __ movd(fptemp, CpuRegister(RAX), is64bit);
4732     }
4733     // Perform the FP addition and move it to a temporary register to prepare for CMPXCHG.
4734     if (is64bit) {
4735       __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
4736     } else {
4737       __ addss(fptemp, value.AsFpuRegister<XmmRegister>());
4738     }
4739     __ movd(temp, fptemp, is64bit);
4740     // If necessary, byte swap RAX before CMPXCHG and the temporary before copying to FP register.
4741     if (byte_swap) {
4742       codegen->GetInstructionCodegen()->Bswap(temp_loc, bswap_type);
4743       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4744     }
4745     if (is64bit) {
4746       __ LockCmpxchgq(field_addr, temp);
4747     } else {
4748       __ LockCmpxchgl(field_addr, temp);
4749     }
4750 
4751     __ j(kNotZero, &retry);
4752 
4753     // The old value is in RAX, byte swap if necessary.
4754     if (byte_swap) {
4755       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4756     }
4757     __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
4758   } else {
4759     if (byte_swap) {
4760       // We cannot use XADD since we need to byte-swap the old value when reading it from memory,
4761       // and then byte-swap the sum before writing it to memory. So fallback to the slower generic
4762       // implementation that is also used for bitwise operations.
4763       // Move value from RAX to a temporary register, as RAX may get clobbered by repeated CMPXCHG.
4764       DCHECK_EQ(GetExpectedVarHandleCoordinatesCount(invoke), 2u);
4765       Location temp = locations->GetTemp(temp_count - 2);
4766       codegen->Move(temp, value);
4767       GenerateVarHandleGetAndOp(
4768           invoke, codegen, temp, type, field_addr, GetAndUpdateOp::kAdd, byte_swap);
4769     } else {
4770       // `getAndAdd` for integral types: atomically exchange the new value with the field and add
4771       // the old value to the field. Output register is the same as the one holding new value. Do
4772       // sign extend / zero extend as needed.
4773       CpuRegister valreg = value.AsRegister<CpuRegister>();
4774       DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4775       switch (type) {
4776         case DataType::Type::kBool:
4777         case DataType::Type::kUint8:
4778           __ LockXaddb(field_addr, valreg);
4779           __ movzxb(valreg, valreg);
4780           break;
4781         case DataType::Type::kInt8:
4782           __ LockXaddb(field_addr, valreg);
4783           __ movsxb(valreg, valreg);
4784           break;
4785         case DataType::Type::kUint16:
4786           __ LockXaddw(field_addr, valreg);
4787           __ movzxw(valreg, valreg);
4788           break;
4789         case DataType::Type::kInt16:
4790           __ LockXaddw(field_addr, valreg);
4791           __ movsxw(valreg, valreg);
4792           break;
4793         case DataType::Type::kInt32:
4794         case DataType::Type::kUint32:
4795           __ LockXaddl(field_addr, valreg);
4796           break;
4797         case DataType::Type::kInt64:
4798         case DataType::Type::kUint64:
4799           __ LockXaddq(field_addr, valreg);
4800           break;
4801         default:
4802           LOG(FATAL) << "unexpected type in getAndAdd intrinsic";
4803           UNREACHABLE();
4804       }
4805     }
4806   }
4807 }
4808 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op,bool need_any_store_barrier,bool need_any_any_barrier,bool byte_swap=false)4809 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
4810                                           CodeGeneratorX86_64* codegen,
4811                                           GetAndUpdateOp get_and_update_op,
4812                                           bool need_any_store_barrier,
4813                                           bool need_any_any_barrier,
4814                                           bool byte_swap = false) {
4815   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4816 
4817   X86_64Assembler* assembler = codegen->GetAssembler();
4818   LocationSummary* locations = invoke->GetLocations();
4819 
4820   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4821   Location value = locations->InAt(number_of_arguments - 1);
4822   DataType::Type type = invoke->GetType();
4823 
4824   VarHandleSlowPathX86_64* slow_path = nullptr;
4825   VarHandleTarget target = GetVarHandleTarget(invoke);
4826   if (!byte_swap) {
4827     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4828     GenerateVarHandleTarget(invoke, target, codegen);
4829     if (slow_path != nullptr) {
4830       slow_path->SetGetAndUpdateOp(get_and_update_op);
4831       slow_path->SetNeedAnyStoreBarrier(need_any_store_barrier);
4832       slow_path->SetNeedAnyAnyBarrier(need_any_any_barrier);
4833       __ Bind(slow_path->GetNativeByteOrderLabel());
4834     }
4835   }
4836 
4837   CpuRegister ref(target.object);
4838   Address field_addr(ref, CpuRegister(target.offset), TIMES_1, 0);
4839 
4840   if (need_any_store_barrier) {
4841     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4842   }
4843 
4844   switch (get_and_update_op) {
4845     case GetAndUpdateOp::kSet:
4846       GenerateVarHandleGetAndSet(invoke, codegen, value, type, field_addr, ref, byte_swap);
4847       break;
4848     case GetAndUpdateOp::kAdd:
4849       GenerateVarHandleGetAndAdd(invoke, codegen, value, type, field_addr, byte_swap);
4850       break;
4851     case GetAndUpdateOp::kBitwiseAnd:
4852     case GetAndUpdateOp::kBitwiseOr:
4853     case GetAndUpdateOp::kBitwiseXor:
4854       GenerateVarHandleGetAndOp(
4855           invoke, codegen, value, type, field_addr, get_and_update_op, byte_swap);
4856       break;
4857   }
4858 
4859   if (need_any_any_barrier) {
4860     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4861   }
4862 
4863   if (slow_path != nullptr) {
4864     DCHECK(!byte_swap);
4865     __ Bind(slow_path->GetExitLabel());
4866   }
4867 }
4868 
VisitVarHandleGetAndSet(HInvoke * invoke)4869 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4870   CreateVarHandleGetAndSetLocations(invoke, codegen_);
4871 }
4872 
VisitVarHandleGetAndSet(HInvoke * invoke)4873 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4874   // `getAndSet` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4875   GenerateVarHandleGetAndUpdate(invoke,
4876                                 codegen_,
4877                                 GetAndUpdateOp::kSet,
4878                                 /*need_any_store_barrier=*/ true,
4879                                 /*need_any_any_barrier=*/ true);
4880 }
4881 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4882 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4883   CreateVarHandleGetAndSetLocations(invoke, codegen_);
4884 }
4885 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4886 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4887   // `getAndSetAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4888   GenerateVarHandleGetAndUpdate(invoke,
4889                                 codegen_,
4890                                 GetAndUpdateOp::kSet,
4891                                 /*need_any_store_barrier=*/ false,
4892                                 /*need_any_any_barrier=*/ false);
4893 }
4894 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4895 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4896   CreateVarHandleGetAndSetLocations(invoke, codegen_);
4897 }
4898 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4899 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4900   // `getAndSetRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4901   GenerateVarHandleGetAndUpdate(invoke,
4902                                 codegen_,
4903                                 GetAndUpdateOp::kSet,
4904                                 /*need_any_store_barrier=*/ true,
4905                                 /*need_any_any_barrier=*/ false);
4906 }
4907 
VisitVarHandleGetAndAdd(HInvoke * invoke)4908 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4909   CreateVarHandleGetAndAddLocations(invoke, codegen_);
4910 }
4911 
VisitVarHandleGetAndAdd(HInvoke * invoke)4912 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4913   // `getAndAdd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4914   GenerateVarHandleGetAndUpdate(invoke,
4915                                 codegen_,
4916                                 GetAndUpdateOp::kAdd,
4917                                 /*need_any_store_barrier=*/ true,
4918                                 /*need_any_any_barrier=*/ true);
4919 }
4920 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4921 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4922   CreateVarHandleGetAndAddLocations(invoke, codegen_);
4923 }
4924 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4925 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4926   // `getAndAddAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4927   GenerateVarHandleGetAndUpdate(invoke,
4928                                 codegen_,
4929                                 GetAndUpdateOp::kAdd,
4930                                 /*need_any_store_barrier=*/ false,
4931                                 /*need_any_any_barrier=*/ false);
4932 }
4933 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4934 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4935   CreateVarHandleGetAndAddLocations(invoke, codegen_);
4936 }
4937 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4938 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4939   // `getAndAddRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4940   GenerateVarHandleGetAndUpdate(invoke,
4941                                 codegen_,
4942                                 GetAndUpdateOp::kAdd,
4943                                 /*need_any_store_barrier=*/ true,
4944                                 /*need_any_any_barrier=*/ false);
4945 }
4946 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4947 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4948   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4949 }
4950 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4951 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4952   // `getAndBitwiseAnd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4953   GenerateVarHandleGetAndUpdate(invoke,
4954                                 codegen_,
4955                                 GetAndUpdateOp::kBitwiseAnd,
4956                                 /*need_any_store_barrier=*/ true,
4957                                 /*need_any_any_barrier=*/ true);
4958 }
4959 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4960 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4961   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4962 }
4963 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4964 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4965   // `getAndBitwiseAndAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4966   GenerateVarHandleGetAndUpdate(invoke,
4967                                 codegen_,
4968                                 GetAndUpdateOp::kBitwiseAnd,
4969                                 /*need_any_store_barrier=*/ false,
4970                                 /*need_any_any_barrier=*/ false);
4971 }
4972 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4973 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4974   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4975 }
4976 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4977 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4978   // `getAndBitwiseAndRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4979   GenerateVarHandleGetAndUpdate(invoke,
4980                                 codegen_,
4981                                 GetAndUpdateOp::kBitwiseAnd,
4982                                 /*need_any_store_barrier=*/ true,
4983                                 /*need_any_any_barrier=*/ false);
4984 }
4985 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4986 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4987   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4988 }
4989 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4990 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4991   // `getAndBitwiseOr` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4992   GenerateVarHandleGetAndUpdate(invoke,
4993                                 codegen_,
4994                                 GetAndUpdateOp::kBitwiseOr,
4995                                 /*need_any_store_barrier=*/ true,
4996                                 /*need_any_any_barrier=*/ true);
4997 }
4998 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4999 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5000   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5001 }
5002 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5003 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5004   // `getAndBitwiseOrAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5005   GenerateVarHandleGetAndUpdate(invoke,
5006                                 codegen_,
5007                                 GetAndUpdateOp::kBitwiseOr,
5008                                 /*need_any_store_barrier=*/ false,
5009                                 /*need_any_any_barrier=*/ false);
5010 }
5011 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5012 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5013   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5014 }
5015 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5016 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5017   // `getAndBitwiseOrRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5018   GenerateVarHandleGetAndUpdate(invoke,
5019                                 codegen_,
5020                                 GetAndUpdateOp::kBitwiseOr,
5021                                 /*need_any_store_barrier=*/ true,
5022                                 /*need_any_any_barrier=*/ false);
5023 }
5024 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5025 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5026   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5027 }
5028 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5029 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5030   // `getAndBitwiseXor` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5031   GenerateVarHandleGetAndUpdate(invoke,
5032                                 codegen_,
5033                                 GetAndUpdateOp::kBitwiseXor,
5034                                 /*need_any_store_barrier=*/ true,
5035                                 /*need_any_any_barrier=*/ true);
5036 }
5037 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5038 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5039   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5040 }
5041 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5042 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5043   // `getAndBitwiseXorAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5044   GenerateVarHandleGetAndUpdate(invoke,
5045                                 codegen_,
5046                                 GetAndUpdateOp::kBitwiseXor,
5047                                 /*need_any_store_barrier=*/ false,
5048                                 /*need_any_any_barrier=*/ false);
5049 }
5050 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5051 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5052   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5053 }
5054 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5055 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5056   // `getAndBitwiseXorRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5057   GenerateVarHandleGetAndUpdate(invoke,
5058                                 codegen_,
5059                                 GetAndUpdateOp::kBitwiseXor,
5060                                 /*need_any_store_barrier=*/ true,
5061                                 /*need_any_any_barrier=*/ false);
5062 }
5063 
EmitByteArrayViewCode(CodeGeneratorX86_64 * codegen)5064 void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
5065   DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5066   X86_64Assembler* assembler = codegen->GetAssembler();
5067 
5068   HInvoke* invoke = GetInvoke();
5069   LocationSummary* locations = invoke->GetLocations();
5070   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5071   DataType::Type value_type =
5072       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5073   DCHECK_NE(value_type, DataType::Type::kReference);
5074   size_t size = DataType::Size(value_type);
5075   DCHECK_GT(size, 1u);
5076 
5077   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
5078   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
5079   CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
5080   CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1).AsRegister<CpuRegister>();
5081 
5082   MemberOffset class_offset = mirror::Object::ClassOffset();
5083   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5084   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5085   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5086 
5087   VarHandleTarget target = GetVarHandleTarget(invoke);
5088 
5089   __ Bind(GetByteArrayViewCheckLabel());
5090 
5091   // The main path checked that the coordinateType0 is an array class that matches
5092   // the class of the actual coordinate argument but it does not match the value type.
5093   // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5094   codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5095   assembler->MaybePoisonHeapReference(temp);
5096   __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
5097   __ j(kNotEqual, GetEntryLabel());
5098 
5099   // Check for array index out of bounds.
5100   __ movl(temp, Address(object, array_length_offset.Int32Value()));
5101   // SUB sets flags in the same way as CMP.
5102   __ subl(temp, index);
5103   __ j(kBelowEqual, GetEntryLabel());
5104   // The difference between index and array length must be enough for the `value_type` size.
5105   __ cmpl(temp, Immediate(size));
5106   __ j(kBelow, GetEntryLabel());
5107 
5108   // Construct the target.
5109   __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
5110 
5111   // Alignment check. For unaligned access, go to the runtime.
5112   DCHECK(IsPowerOfTwo(size));
5113   __ testl(CpuRegister(target.offset), Immediate(size - 1u));
5114   __ j(kNotZero, GetEntryLabel());
5115 
5116   // Byte order check. For native byte order return to the main path.
5117   if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5118       IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5119     // There is no reason to differentiate between native byte order and byte-swap
5120     // for setting a zero bit pattern. Just return to the main path.
5121     __ jmp(GetNativeByteOrderLabel());
5122     return;
5123   }
5124   __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
5125   __ j(kNotEqual, GetNativeByteOrderLabel());
5126 
5127   switch (access_mode_template) {
5128     case mirror::VarHandle::AccessModeTemplate::kGet:
5129       GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
5130       break;
5131     case mirror::VarHandle::AccessModeTemplate::kSet:
5132       GenerateVarHandleSet(invoke, codegen, is_volatile_, is_atomic_, /*byte_swap=*/ true);
5133       break;
5134     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5135       GenerateVarHandleCompareAndSetOrExchange(
5136           invoke, codegen, /*is_cmpxchg=*/ false, /*byte_swap=*/ true);
5137       break;
5138     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5139       GenerateVarHandleCompareAndSetOrExchange(
5140           invoke, codegen, /*is_cmpxchg=*/ true, /*byte_swap=*/ true);
5141       break;
5142     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5143       GenerateVarHandleGetAndUpdate(invoke,
5144                                     codegen,
5145                                     get_and_update_op_,
5146                                     need_any_store_barrier_,
5147                                     need_any_any_barrier_,
5148                                     /*byte_swap=*/ true);
5149       break;
5150   }
5151 
5152   __ jmp(GetExitLabel());
5153 }
5154 
5155 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
5156 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
5157 #undef MARK_UNIMPLEMENTED
5158 
5159 UNREACHABLE_INTRINSICS(X86_64)
5160 
5161 #undef __
5162 
5163 }  // namespace x86_64
5164 }  // namespace art
5165