1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86_64.h"
18 
19 #include <limits>
20 
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_utils.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object_array-inl.h"
32 #include "mirror/reference.h"
33 #include "mirror/string.h"
34 #include "scoped_thread_state_change-inl.h"
35 #include "thread-current-inl.h"
36 #include "utils/x86_64/assembler_x86_64.h"
37 #include "utils/x86_64/constants_x86_64.h"
38 
39 namespace art {
40 
41 namespace x86_64 {
42 
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)43 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
44   : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
45 }
46 
GetAssembler()47 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
48   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
49 }
50 
GetAllocator()51 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
52   return codegen_->GetGraph()->GetAllocator();
53 }
54 
TryDispatch(HInvoke * invoke)55 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
56   Dispatch(invoke);
57   LocationSummary* res = invoke->GetLocations();
58   if (res == nullptr) {
59     return false;
60   }
61   return res->Intrinsified();
62 }
63 
64 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
65 
66 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
67 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
68 
69 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
70 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
71  public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)72   explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
73       : SlowPathCode(instruction) {
74     DCHECK(kEmitCompilerReadBarrier);
75     DCHECK(kUseBakerReadBarrier);
76   }
77 
EmitNativeCode(CodeGenerator * codegen)78   void EmitNativeCode(CodeGenerator* codegen) override {
79     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
80     LocationSummary* locations = instruction_->GetLocations();
81     DCHECK(locations->CanCall());
82     DCHECK(instruction_->IsInvokeStaticOrDirect())
83         << "Unexpected instruction in read barrier arraycopy slow path: "
84         << instruction_->DebugName();
85     DCHECK(instruction_->GetLocations()->Intrinsified());
86     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
87 
88     int32_t element_size = DataType::Size(DataType::Type::kReference);
89 
90     CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
91     CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
92     CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
93 
94     __ Bind(GetEntryLabel());
95     NearLabel loop;
96     __ Bind(&loop);
97     __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
98     __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
99     // TODO: Inline the mark bit check before calling the runtime?
100     // TMP = ReadBarrier::Mark(TMP);
101     // No need to save live registers; it's taken care of by the
102     // entrypoint. Also, there is no need to update the stack mask,
103     // as this runtime call will not trigger a garbage collection.
104     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
105     // This runtime call does not require a stack map.
106     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
107     __ MaybePoisonHeapReference(CpuRegister(TMP));
108     __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
109     __ addl(src_curr_addr, Immediate(element_size));
110     __ addl(dst_curr_addr, Immediate(element_size));
111     __ cmpl(src_curr_addr, src_stop_addr);
112     __ j(kNotEqual, &loop);
113     __ jmp(GetExitLabel());
114   }
115 
GetDescription() const116   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
117 
118  private:
119   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
120 };
121 
122 #undef __
123 
124 #define __ assembler->
125 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)126 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
127   LocationSummary* locations =
128       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
129   locations->SetInAt(0, Location::RequiresFpuRegister());
130   locations->SetOut(Location::RequiresRegister());
131 }
132 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)133 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
134   LocationSummary* locations =
135       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
136   locations->SetInAt(0, Location::RequiresRegister());
137   locations->SetOut(Location::RequiresFpuRegister());
138 }
139 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)140 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
141   Location input = locations->InAt(0);
142   Location output = locations->Out();
143   __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
144 }
145 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)146 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
147   Location input = locations->InAt(0);
148   Location output = locations->Out();
149   __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
150 }
151 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)152 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
153   CreateFPToIntLocations(allocator_, invoke);
154 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)155 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
156   CreateIntToFPLocations(allocator_, invoke);
157 }
158 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)159 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
160   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
161 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)162 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
163   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
164 }
165 
VisitFloatFloatToRawIntBits(HInvoke * invoke)166 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
167   CreateFPToIntLocations(allocator_, invoke);
168 }
VisitFloatIntBitsToFloat(HInvoke * invoke)169 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
170   CreateIntToFPLocations(allocator_, invoke);
171 }
172 
VisitFloatFloatToRawIntBits(HInvoke * invoke)173 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
174   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
175 }
VisitFloatIntBitsToFloat(HInvoke * invoke)176 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
177   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
178 }
179 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)180 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
181   LocationSummary* locations =
182       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
183   locations->SetInAt(0, Location::RequiresRegister());
184   locations->SetOut(Location::SameAsFirstInput());
185 }
186 
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)187 static void GenReverseBytes(LocationSummary* locations,
188                             DataType::Type size,
189                             X86_64Assembler* assembler) {
190   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
191 
192   switch (size) {
193     case DataType::Type::kInt16:
194       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
195       __ bswapl(out);
196       __ sarl(out, Immediate(16));
197       break;
198     case DataType::Type::kInt32:
199       __ bswapl(out);
200       break;
201     case DataType::Type::kInt64:
202       __ bswapq(out);
203       break;
204     default:
205       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
206       UNREACHABLE();
207   }
208 }
209 
VisitIntegerReverseBytes(HInvoke * invoke)210 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
211   CreateIntToIntLocations(allocator_, invoke);
212 }
213 
VisitIntegerReverseBytes(HInvoke * invoke)214 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
215   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
216 }
217 
VisitLongReverseBytes(HInvoke * invoke)218 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
219   CreateIntToIntLocations(allocator_, invoke);
220 }
221 
VisitLongReverseBytes(HInvoke * invoke)222 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
223   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
224 }
225 
VisitShortReverseBytes(HInvoke * invoke)226 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
227   CreateIntToIntLocations(allocator_, invoke);
228 }
229 
VisitShortReverseBytes(HInvoke * invoke)230 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
231   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
232 }
233 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)234 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
235   LocationSummary* locations =
236       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
237   locations->SetInAt(0, Location::RequiresFpuRegister());
238   locations->SetOut(Location::RequiresFpuRegister());
239 }
240 
VisitMathSqrt(HInvoke * invoke)241 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
242   CreateFPToFPLocations(allocator_, invoke);
243 }
244 
VisitMathSqrt(HInvoke * invoke)245 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
246   LocationSummary* locations = invoke->GetLocations();
247   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
248   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
249 
250   GetAssembler()->sqrtsd(out, in);
251 }
252 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)253 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
254                                        HInvoke* invoke,
255                                        CodeGeneratorX86_64* codegen) {
256   // Do we have instruction support?
257   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
258     return;
259   }
260 
261   CreateFPToFPLocations(allocator, invoke);
262 }
263 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)264 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
265   LocationSummary* locations = invoke->GetLocations();
266   DCHECK(!locations->WillCall());
267   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
268   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
269   __ roundsd(out, in, Immediate(round_mode));
270 }
271 
VisitMathCeil(HInvoke * invoke)272 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
273   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
274 }
275 
VisitMathCeil(HInvoke * invoke)276 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
277   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
278 }
279 
VisitMathFloor(HInvoke * invoke)280 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
281   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
282 }
283 
VisitMathFloor(HInvoke * invoke)284 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
285   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
286 }
287 
VisitMathRint(HInvoke * invoke)288 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
289   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
290 }
291 
VisitMathRint(HInvoke * invoke)292 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
293   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
294 }
295 
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)296 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
297                                         HInvoke* invoke,
298                                         CodeGeneratorX86_64* codegen) {
299   // Do we have instruction support?
300   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
301     return;
302   }
303 
304   LocationSummary* locations =
305       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
306   locations->SetInAt(0, Location::RequiresFpuRegister());
307   locations->SetOut(Location::RequiresRegister());
308   locations->AddTemp(Location::RequiresFpuRegister());
309   locations->AddTemp(Location::RequiresFpuRegister());
310 }
311 
VisitMathRoundFloat(HInvoke * invoke)312 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
313   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
314 }
315 
VisitMathRoundFloat(HInvoke * invoke)316 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
317   LocationSummary* locations = invoke->GetLocations();
318   DCHECK(!locations->WillCall());
319 
320   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
321   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
322   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
323   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
324   NearLabel skip_incr, done;
325   X86_64Assembler* assembler = GetAssembler();
326 
327   // Since no direct x86 rounding instruction matches the required semantics,
328   // this intrinsic is implemented as follows:
329   //  result = floor(in);
330   //  if (in - result >= 0.5f)
331   //    result = result + 1.0f;
332   __ movss(t2, in);
333   __ roundss(t1, in, Immediate(1));
334   __ subss(t2, t1);
335   __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
336   __ j(kBelow, &skip_incr);
337   __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
338   __ Bind(&skip_incr);
339 
340   // Final conversion to an integer. Unfortunately this also does not have a
341   // direct x86 instruction, since NaN should map to 0 and large positive
342   // values need to be clipped to the extreme value.
343   codegen_->Load32BitValue(out, kPrimIntMax);
344   __ cvtsi2ss(t2, out);
345   __ comiss(t1, t2);
346   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
347   __ movl(out, Immediate(0));  // does not change flags
348   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
349   __ cvttss2si(out, t1);
350   __ Bind(&done);
351 }
352 
VisitMathRoundDouble(HInvoke * invoke)353 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
354   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
355 }
356 
VisitMathRoundDouble(HInvoke * invoke)357 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
358   LocationSummary* locations = invoke->GetLocations();
359   DCHECK(!locations->WillCall());
360 
361   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
362   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
363   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
364   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
365   NearLabel skip_incr, done;
366   X86_64Assembler* assembler = GetAssembler();
367 
368   // Since no direct x86 rounding instruction matches the required semantics,
369   // this intrinsic is implemented as follows:
370   //  result = floor(in);
371   //  if (in - result >= 0.5)
372   //    result = result + 1.0f;
373   __ movsd(t2, in);
374   __ roundsd(t1, in, Immediate(1));
375   __ subsd(t2, t1);
376   __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
377   __ j(kBelow, &skip_incr);
378   __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
379   __ Bind(&skip_incr);
380 
381   // Final conversion to an integer. Unfortunately this also does not have a
382   // direct x86 instruction, since NaN should map to 0 and large positive
383   // values need to be clipped to the extreme value.
384   codegen_->Load64BitValue(out, kPrimLongMax);
385   __ cvtsi2sd(t2, out, /* is64bit= */ true);
386   __ comisd(t1, t2);
387   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
388   __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
389   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
390   __ cvttsd2si(out, t1, /* is64bit= */ true);
391   __ Bind(&done);
392 }
393 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)394 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
395   LocationSummary* locations =
396       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
397   InvokeRuntimeCallingConvention calling_convention;
398   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
399   locations->SetOut(Location::FpuRegisterLocation(XMM0));
400 
401   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
402 }
403 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)404 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
405                           QuickEntrypointEnum entry) {
406   LocationSummary* locations = invoke->GetLocations();
407   DCHECK(locations->WillCall());
408   DCHECK(invoke->IsInvokeStaticOrDirect());
409 
410   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
411 }
412 
VisitMathCos(HInvoke * invoke)413 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
414   CreateFPToFPCallLocations(allocator_, invoke);
415 }
416 
VisitMathCos(HInvoke * invoke)417 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
418   GenFPToFPCall(invoke, codegen_, kQuickCos);
419 }
420 
VisitMathSin(HInvoke * invoke)421 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
422   CreateFPToFPCallLocations(allocator_, invoke);
423 }
424 
VisitMathSin(HInvoke * invoke)425 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
426   GenFPToFPCall(invoke, codegen_, kQuickSin);
427 }
428 
VisitMathAcos(HInvoke * invoke)429 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
430   CreateFPToFPCallLocations(allocator_, invoke);
431 }
432 
VisitMathAcos(HInvoke * invoke)433 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
434   GenFPToFPCall(invoke, codegen_, kQuickAcos);
435 }
436 
VisitMathAsin(HInvoke * invoke)437 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
438   CreateFPToFPCallLocations(allocator_, invoke);
439 }
440 
VisitMathAsin(HInvoke * invoke)441 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
442   GenFPToFPCall(invoke, codegen_, kQuickAsin);
443 }
444 
VisitMathAtan(HInvoke * invoke)445 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
446   CreateFPToFPCallLocations(allocator_, invoke);
447 }
448 
VisitMathAtan(HInvoke * invoke)449 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
450   GenFPToFPCall(invoke, codegen_, kQuickAtan);
451 }
452 
VisitMathCbrt(HInvoke * invoke)453 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
454   CreateFPToFPCallLocations(allocator_, invoke);
455 }
456 
VisitMathCbrt(HInvoke * invoke)457 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
458   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
459 }
460 
VisitMathCosh(HInvoke * invoke)461 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
462   CreateFPToFPCallLocations(allocator_, invoke);
463 }
464 
VisitMathCosh(HInvoke * invoke)465 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
466   GenFPToFPCall(invoke, codegen_, kQuickCosh);
467 }
468 
VisitMathExp(HInvoke * invoke)469 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
470   CreateFPToFPCallLocations(allocator_, invoke);
471 }
472 
VisitMathExp(HInvoke * invoke)473 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
474   GenFPToFPCall(invoke, codegen_, kQuickExp);
475 }
476 
VisitMathExpm1(HInvoke * invoke)477 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
478   CreateFPToFPCallLocations(allocator_, invoke);
479 }
480 
VisitMathExpm1(HInvoke * invoke)481 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
482   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
483 }
484 
VisitMathLog(HInvoke * invoke)485 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
486   CreateFPToFPCallLocations(allocator_, invoke);
487 }
488 
VisitMathLog(HInvoke * invoke)489 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
490   GenFPToFPCall(invoke, codegen_, kQuickLog);
491 }
492 
VisitMathLog10(HInvoke * invoke)493 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
494   CreateFPToFPCallLocations(allocator_, invoke);
495 }
496 
VisitMathLog10(HInvoke * invoke)497 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
498   GenFPToFPCall(invoke, codegen_, kQuickLog10);
499 }
500 
VisitMathSinh(HInvoke * invoke)501 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
502   CreateFPToFPCallLocations(allocator_, invoke);
503 }
504 
VisitMathSinh(HInvoke * invoke)505 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
506   GenFPToFPCall(invoke, codegen_, kQuickSinh);
507 }
508 
VisitMathTan(HInvoke * invoke)509 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
510   CreateFPToFPCallLocations(allocator_, invoke);
511 }
512 
VisitMathTan(HInvoke * invoke)513 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
514   GenFPToFPCall(invoke, codegen_, kQuickTan);
515 }
516 
VisitMathTanh(HInvoke * invoke)517 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
518   CreateFPToFPCallLocations(allocator_, invoke);
519 }
520 
VisitMathTanh(HInvoke * invoke)521 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
522   GenFPToFPCall(invoke, codegen_, kQuickTanh);
523 }
524 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)525 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
526   LocationSummary* locations =
527       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
528   InvokeRuntimeCallingConvention calling_convention;
529   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
530   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
531   locations->SetOut(Location::FpuRegisterLocation(XMM0));
532 
533   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
534 }
535 
VisitMathAtan2(HInvoke * invoke)536 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
537   CreateFPFPToFPCallLocations(allocator_, invoke);
538 }
539 
VisitMathAtan2(HInvoke * invoke)540 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
541   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
542 }
543 
VisitMathPow(HInvoke * invoke)544 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
545   CreateFPFPToFPCallLocations(allocator_, invoke);
546 }
547 
VisitMathPow(HInvoke * invoke)548 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
549   GenFPToFPCall(invoke, codegen_, kQuickPow);
550 }
551 
VisitMathHypot(HInvoke * invoke)552 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
553   CreateFPFPToFPCallLocations(allocator_, invoke);
554 }
555 
VisitMathHypot(HInvoke * invoke)556 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
557   GenFPToFPCall(invoke, codegen_, kQuickHypot);
558 }
559 
VisitMathNextAfter(HInvoke * invoke)560 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
561   CreateFPFPToFPCallLocations(allocator_, invoke);
562 }
563 
VisitMathNextAfter(HInvoke * invoke)564 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
565   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
566 }
567 
VisitSystemArrayCopyChar(HInvoke * invoke)568 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
569   // Check to see if we have known failures that will cause us to have to bail out
570   // to the runtime, and just generate the runtime call directly.
571   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
572   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
573 
574   // The positions must be non-negative.
575   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
576       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
577     // We will have to fail anyways.
578     return;
579   }
580 
581   // The length must be > 0.
582   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
583   if (length != nullptr) {
584     int32_t len = length->GetValue();
585     if (len < 0) {
586       // Just call as normal.
587       return;
588     }
589   }
590 
591   LocationSummary* locations =
592       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
593   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
594   locations->SetInAt(0, Location::RequiresRegister());
595   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
596   locations->SetInAt(2, Location::RequiresRegister());
597   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
598   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
599 
600   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
601   locations->AddTemp(Location::RegisterLocation(RSI));
602   locations->AddTemp(Location::RegisterLocation(RDI));
603   locations->AddTemp(Location::RegisterLocation(RCX));
604 }
605 
CheckPosition(X86_64Assembler * assembler,Location pos,CpuRegister input,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_input_length=false)606 static void CheckPosition(X86_64Assembler* assembler,
607                           Location pos,
608                           CpuRegister input,
609                           Location length,
610                           SlowPathCode* slow_path,
611                           CpuRegister temp,
612                           bool length_is_input_length = false) {
613   // Where is the length in the Array?
614   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
615 
616   if (pos.IsConstant()) {
617     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
618     if (pos_const == 0) {
619       if (!length_is_input_length) {
620         // Check that length(input) >= length.
621         if (length.IsConstant()) {
622           __ cmpl(Address(input, length_offset),
623                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
624         } else {
625           __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
626         }
627         __ j(kLess, slow_path->GetEntryLabel());
628       }
629     } else {
630       // Check that length(input) >= pos.
631       __ movl(temp, Address(input, length_offset));
632       __ subl(temp, Immediate(pos_const));
633       __ j(kLess, slow_path->GetEntryLabel());
634 
635       // Check that (length(input) - pos) >= length.
636       if (length.IsConstant()) {
637         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
638       } else {
639         __ cmpl(temp, length.AsRegister<CpuRegister>());
640       }
641       __ j(kLess, slow_path->GetEntryLabel());
642     }
643   } else if (length_is_input_length) {
644     // The only way the copy can succeed is if pos is zero.
645     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
646     __ testl(pos_reg, pos_reg);
647     __ j(kNotEqual, slow_path->GetEntryLabel());
648   } else {
649     // Check that pos >= 0.
650     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
651     __ testl(pos_reg, pos_reg);
652     __ j(kLess, slow_path->GetEntryLabel());
653 
654     // Check that pos <= length(input).
655     __ cmpl(Address(input, length_offset), pos_reg);
656     __ j(kLess, slow_path->GetEntryLabel());
657 
658     // Check that (length(input) - pos) >= length.
659     __ movl(temp, Address(input, length_offset));
660     __ subl(temp, pos_reg);
661     if (length.IsConstant()) {
662       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
663     } else {
664       __ cmpl(temp, length.AsRegister<CpuRegister>());
665     }
666     __ j(kLess, slow_path->GetEntryLabel());
667   }
668 }
669 
VisitSystemArrayCopyChar(HInvoke * invoke)670 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
671   X86_64Assembler* assembler = GetAssembler();
672   LocationSummary* locations = invoke->GetLocations();
673 
674   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
675   Location src_pos = locations->InAt(1);
676   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
677   Location dest_pos = locations->InAt(3);
678   Location length = locations->InAt(4);
679 
680   // Temporaries that we need for MOVSW.
681   CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
682   DCHECK_EQ(src_base.AsRegister(), RSI);
683   CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
684   DCHECK_EQ(dest_base.AsRegister(), RDI);
685   CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
686   DCHECK_EQ(count.AsRegister(), RCX);
687 
688   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
689   codegen_->AddSlowPath(slow_path);
690 
691   // Bail out if the source and destination are the same.
692   __ cmpl(src, dest);
693   __ j(kEqual, slow_path->GetEntryLabel());
694 
695   // Bail out if the source is null.
696   __ testl(src, src);
697   __ j(kEqual, slow_path->GetEntryLabel());
698 
699   // Bail out if the destination is null.
700   __ testl(dest, dest);
701   __ j(kEqual, slow_path->GetEntryLabel());
702 
703   // If the length is negative, bail out.
704   // We have already checked in the LocationsBuilder for the constant case.
705   if (!length.IsConstant()) {
706     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
707     __ j(kLess, slow_path->GetEntryLabel());
708   }
709 
710   // Validity checks: source. Use src_base as a temporary register.
711   CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
712 
713   // Validity checks: dest. Use src_base as a temporary register.
714   CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
715 
716   // We need the count in RCX.
717   if (length.IsConstant()) {
718     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
719   } else {
720     __ movl(count, length.AsRegister<CpuRegister>());
721   }
722 
723   // Okay, everything checks out.  Finally time to do the copy.
724   // Check assumption that sizeof(Char) is 2 (used in scaling below).
725   const size_t char_size = DataType::Size(DataType::Type::kUint16);
726   DCHECK_EQ(char_size, 2u);
727 
728   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
729 
730   if (src_pos.IsConstant()) {
731     int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
732     __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
733   } else {
734     __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
735                               ScaleFactor::TIMES_2, data_offset));
736   }
737   if (dest_pos.IsConstant()) {
738     int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
739     __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
740   } else {
741     __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
742                                ScaleFactor::TIMES_2, data_offset));
743   }
744 
745   // Do the move.
746   __ rep_movsw();
747 
748   __ Bind(slow_path->GetExitLabel());
749 }
750 
751 
VisitSystemArrayCopy(HInvoke * invoke)752 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
753   // The only read barrier implementation supporting the
754   // SystemArrayCopy intrinsic is the Baker-style read barriers.
755   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
756     return;
757   }
758 
759   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
760 }
761 
762 // Compute base source address, base destination address, and end
763 // source address for the System.arraycopy intrinsic in `src_base`,
764 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(X86_64Assembler * assembler,DataType::Type type,const CpuRegister & src,const Location & src_pos,const CpuRegister & dst,const Location & dst_pos,const Location & copy_length,const CpuRegister & src_base,const CpuRegister & dst_base,const CpuRegister & src_end)765 static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
766                                         DataType::Type type,
767                                         const CpuRegister& src,
768                                         const Location& src_pos,
769                                         const CpuRegister& dst,
770                                         const Location& dst_pos,
771                                         const Location& copy_length,
772                                         const CpuRegister& src_base,
773                                         const CpuRegister& dst_base,
774                                         const CpuRegister& src_end) {
775   // This routine is only used by the SystemArrayCopy intrinsic.
776   DCHECK_EQ(type, DataType::Type::kReference);
777   const int32_t element_size = DataType::Size(type);
778   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
779   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
780 
781   if (src_pos.IsConstant()) {
782     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
783     __ leal(src_base, Address(src, element_size * constant + data_offset));
784   } else {
785     __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
786   }
787 
788   if (dst_pos.IsConstant()) {
789     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
790     __ leal(dst_base, Address(dst, element_size * constant + data_offset));
791   } else {
792     __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
793   }
794 
795   if (copy_length.IsConstant()) {
796     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
797     __ leal(src_end, Address(src_base, element_size * constant));
798   } else {
799     __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0));
800   }
801 }
802 
VisitSystemArrayCopy(HInvoke * invoke)803 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
804   // The only read barrier implementation supporting the
805   // SystemArrayCopy intrinsic is the Baker-style read barriers.
806   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
807 
808   X86_64Assembler* assembler = GetAssembler();
809   LocationSummary* locations = invoke->GetLocations();
810 
811   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
812   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
813   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
814   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
815   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
816 
817   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
818   Location src_pos = locations->InAt(1);
819   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
820   Location dest_pos = locations->InAt(3);
821   Location length = locations->InAt(4);
822   Location temp1_loc = locations->GetTemp(0);
823   CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
824   Location temp2_loc = locations->GetTemp(1);
825   CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
826   Location temp3_loc = locations->GetTemp(2);
827   CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
828   Location TMP_loc = Location::RegisterLocation(TMP);
829 
830   SlowPathCode* intrinsic_slow_path =
831       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
832   codegen_->AddSlowPath(intrinsic_slow_path);
833 
834   NearLabel conditions_on_positions_validated;
835   SystemArrayCopyOptimizations optimizations(invoke);
836 
837   // If source and destination are the same, we go to slow path if we need to do
838   // forward copying.
839   if (src_pos.IsConstant()) {
840     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
841     if (dest_pos.IsConstant()) {
842       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
843       if (optimizations.GetDestinationIsSource()) {
844         // Checked when building locations.
845         DCHECK_GE(src_pos_constant, dest_pos_constant);
846       } else if (src_pos_constant < dest_pos_constant) {
847         __ cmpl(src, dest);
848         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
849       }
850     } else {
851       if (!optimizations.GetDestinationIsSource()) {
852         __ cmpl(src, dest);
853         __ j(kNotEqual, &conditions_on_positions_validated);
854       }
855       __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
856       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
857     }
858   } else {
859     if (!optimizations.GetDestinationIsSource()) {
860       __ cmpl(src, dest);
861       __ j(kNotEqual, &conditions_on_positions_validated);
862     }
863     if (dest_pos.IsConstant()) {
864       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
865       __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
866       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
867     } else {
868       __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
869       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
870     }
871   }
872 
873   __ Bind(&conditions_on_positions_validated);
874 
875   if (!optimizations.GetSourceIsNotNull()) {
876     // Bail out if the source is null.
877     __ testl(src, src);
878     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
879   }
880 
881   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
882     // Bail out if the destination is null.
883     __ testl(dest, dest);
884     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
885   }
886 
887   // If the length is negative, bail out.
888   // We have already checked in the LocationsBuilder for the constant case.
889   if (!length.IsConstant() &&
890       !optimizations.GetCountIsSourceLength() &&
891       !optimizations.GetCountIsDestinationLength()) {
892     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
893     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
894   }
895 
896   // Validity checks: source.
897   CheckPosition(assembler,
898                 src_pos,
899                 src,
900                 length,
901                 intrinsic_slow_path,
902                 temp1,
903                 optimizations.GetCountIsSourceLength());
904 
905   // Validity checks: dest.
906   CheckPosition(assembler,
907                 dest_pos,
908                 dest,
909                 length,
910                 intrinsic_slow_path,
911                 temp1,
912                 optimizations.GetCountIsDestinationLength());
913 
914   if (!optimizations.GetDoesNotNeedTypeCheck()) {
915     // Check whether all elements of the source array are assignable to the component
916     // type of the destination array. We do two checks: the classes are the same,
917     // or the destination is Object[]. If none of these checks succeed, we go to the
918     // slow path.
919 
920     bool did_unpoison = false;
921     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
922       // /* HeapReference<Class> */ temp1 = dest->klass_
923       codegen_->GenerateFieldLoadWithBakerReadBarrier(
924           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
925       // Register `temp1` is not trashed by the read barrier emitted
926       // by GenerateFieldLoadWithBakerReadBarrier below, as that
927       // method produces a call to a ReadBarrierMarkRegX entry point,
928       // which saves all potentially live registers, including
929       // temporaries such a `temp1`.
930       // /* HeapReference<Class> */ temp2 = src->klass_
931       codegen_->GenerateFieldLoadWithBakerReadBarrier(
932           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
933       // If heap poisoning is enabled, `temp1` and `temp2` have been
934       // unpoisoned by the the previous calls to
935       // GenerateFieldLoadWithBakerReadBarrier.
936     } else {
937       // /* HeapReference<Class> */ temp1 = dest->klass_
938       __ movl(temp1, Address(dest, class_offset));
939       // /* HeapReference<Class> */ temp2 = src->klass_
940       __ movl(temp2, Address(src, class_offset));
941       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
942           !optimizations.GetSourceIsNonPrimitiveArray()) {
943         // One or two of the references need to be unpoisoned. Unpoison them
944         // both to make the identity check valid.
945         __ MaybeUnpoisonHeapReference(temp1);
946         __ MaybeUnpoisonHeapReference(temp2);
947         did_unpoison = true;
948       }
949     }
950 
951     if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
952       // Bail out if the destination is not a non primitive array.
953       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
954         // /* HeapReference<Class> */ TMP = temp1->component_type_
955         codegen_->GenerateFieldLoadWithBakerReadBarrier(
956             invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
957         __ testl(CpuRegister(TMP), CpuRegister(TMP));
958         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
959         // If heap poisoning is enabled, `TMP` has been unpoisoned by
960         // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
961       } else {
962         // /* HeapReference<Class> */ TMP = temp1->component_type_
963         __ movl(CpuRegister(TMP), Address(temp1, component_offset));
964         __ testl(CpuRegister(TMP), CpuRegister(TMP));
965         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
966         __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
967       }
968       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
969       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
970     }
971 
972     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
973       // Bail out if the source is not a non primitive array.
974       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
975         // For the same reason given earlier, `temp1` is not trashed by the
976         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
977         // /* HeapReference<Class> */ TMP = temp2->component_type_
978         codegen_->GenerateFieldLoadWithBakerReadBarrier(
979             invoke, TMP_loc, temp2, component_offset, /* needs_null_check= */ false);
980         __ testl(CpuRegister(TMP), CpuRegister(TMP));
981         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
982         // If heap poisoning is enabled, `TMP` has been unpoisoned by
983         // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
984       } else {
985         // /* HeapReference<Class> */ TMP = temp2->component_type_
986         __ movl(CpuRegister(TMP), Address(temp2, component_offset));
987         __ testl(CpuRegister(TMP), CpuRegister(TMP));
988         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
989         __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
990       }
991       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
992       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
993     }
994 
995     __ cmpl(temp1, temp2);
996 
997     if (optimizations.GetDestinationIsTypedObjectArray()) {
998       NearLabel do_copy;
999       __ j(kEqual, &do_copy);
1000       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1001         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1002         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1003             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
1004         // We do not need to emit a read barrier for the following
1005         // heap reference load, as `temp1` is only used in a
1006         // comparison with null below, and this reference is not
1007         // kept afterwards.
1008         __ cmpl(Address(temp1, super_offset), Immediate(0));
1009       } else {
1010         if (!did_unpoison) {
1011           __ MaybeUnpoisonHeapReference(temp1);
1012         }
1013         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1014         __ movl(temp1, Address(temp1, component_offset));
1015         __ MaybeUnpoisonHeapReference(temp1);
1016         // No need to unpoison the following heap reference load, as
1017         // we're comparing against null.
1018         __ cmpl(Address(temp1, super_offset), Immediate(0));
1019       }
1020       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1021       __ Bind(&do_copy);
1022     } else {
1023       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1024     }
1025   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1026     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1027     // Bail out if the source is not a non primitive array.
1028     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1029       // /* HeapReference<Class> */ temp1 = src->klass_
1030       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1031           invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
1032       // /* HeapReference<Class> */ TMP = temp1->component_type_
1033       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1034           invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
1035       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1036       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1037     } else {
1038       // /* HeapReference<Class> */ temp1 = src->klass_
1039       __ movl(temp1, Address(src, class_offset));
1040       __ MaybeUnpoisonHeapReference(temp1);
1041       // /* HeapReference<Class> */ TMP = temp1->component_type_
1042       __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1043       // No need to unpoison `TMP` now, as we're comparing against null.
1044       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1045       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1046       __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1047     }
1048     __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1049     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1050   }
1051 
1052   const DataType::Type type = DataType::Type::kReference;
1053   const int32_t element_size = DataType::Size(type);
1054 
1055   // Compute base source address, base destination address, and end
1056   // source address in `temp1`, `temp2` and `temp3` respectively.
1057   GenSystemArrayCopyAddresses(
1058       GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
1059 
1060   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1061     // SystemArrayCopy implementation for Baker read barriers (see
1062     // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1063     //
1064     //   if (src_ptr != end_ptr) {
1065     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1066     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1067     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
1068     //     if (is_gray) {
1069     //       // Slow-path copy.
1070     //       do {
1071     //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1072     //       } while (src_ptr != end_ptr)
1073     //     } else {
1074     //       // Fast-path copy.
1075     //       do {
1076     //         *dest_ptr++ = *src_ptr++;
1077     //       } while (src_ptr != end_ptr)
1078     //     }
1079     //   }
1080 
1081     NearLabel loop, done;
1082 
1083     // Don't enter copy loop if `length == 0`.
1084     __ cmpl(temp1, temp3);
1085     __ j(kEqual, &done);
1086 
1087     // Given the numeric representation, it's enough to check the low bit of the rb_state.
1088     static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1089     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1090     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1091     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1092     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1093 
1094     // if (rb_state == ReadBarrier::GrayState())
1095     //   goto slow_path;
1096     // At this point, just do the "if" and make sure that flags are preserved until the branch.
1097     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1098 
1099     // Load fence to prevent load-load reordering.
1100     // Note that this is a no-op, thanks to the x86-64 memory model.
1101     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1102 
1103     // Slow path used to copy array when `src` is gray.
1104     SlowPathCode* read_barrier_slow_path =
1105         new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1106     codegen_->AddSlowPath(read_barrier_slow_path);
1107 
1108     // We have done the "if" of the gray bit check above, now branch based on the flags.
1109     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1110 
1111     // Fast-path copy.
1112     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1113     // poison/unpoison.
1114     __ Bind(&loop);
1115     __ movl(CpuRegister(TMP), Address(temp1, 0));
1116     __ movl(Address(temp2, 0), CpuRegister(TMP));
1117     __ addl(temp1, Immediate(element_size));
1118     __ addl(temp2, Immediate(element_size));
1119     __ cmpl(temp1, temp3);
1120     __ j(kNotEqual, &loop);
1121 
1122     __ Bind(read_barrier_slow_path->GetExitLabel());
1123     __ Bind(&done);
1124   } else {
1125     // Non read barrier code.
1126 
1127     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1128     // poison/unpoison.
1129     NearLabel loop, done;
1130     __ cmpl(temp1, temp3);
1131     __ j(kEqual, &done);
1132     __ Bind(&loop);
1133     __ movl(CpuRegister(TMP), Address(temp1, 0));
1134     __ movl(Address(temp2, 0), CpuRegister(TMP));
1135     __ addl(temp1, Immediate(element_size));
1136     __ addl(temp2, Immediate(element_size));
1137     __ cmpl(temp1, temp3);
1138     __ j(kNotEqual, &loop);
1139     __ Bind(&done);
1140   }
1141 
1142   // We only need one card marking on the destination array.
1143   codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null= */ false);
1144 
1145   __ Bind(intrinsic_slow_path->GetExitLabel());
1146 }
1147 
VisitStringCompareTo(HInvoke * invoke)1148 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1149   LocationSummary* locations = new (allocator_) LocationSummary(
1150       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1151   InvokeRuntimeCallingConvention calling_convention;
1152   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1153   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1154   locations->SetOut(Location::RegisterLocation(RAX));
1155 }
1156 
VisitStringCompareTo(HInvoke * invoke)1157 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1158   X86_64Assembler* assembler = GetAssembler();
1159   LocationSummary* locations = invoke->GetLocations();
1160 
1161   // Note that the null check must have been done earlier.
1162   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1163 
1164   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1165   __ testl(argument, argument);
1166   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1167   codegen_->AddSlowPath(slow_path);
1168   __ j(kEqual, slow_path->GetEntryLabel());
1169 
1170   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1171   __ Bind(slow_path->GetExitLabel());
1172 }
1173 
VisitStringEquals(HInvoke * invoke)1174 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1175   LocationSummary* locations =
1176       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1177   locations->SetInAt(0, Location::RequiresRegister());
1178   locations->SetInAt(1, Location::RequiresRegister());
1179 
1180   // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1181   locations->AddTemp(Location::RegisterLocation(RCX));
1182   locations->AddTemp(Location::RegisterLocation(RDI));
1183 
1184   // Set output, RSI needed for repe_cmpsq instruction anyways.
1185   locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1186 }
1187 
VisitStringEquals(HInvoke * invoke)1188 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1189   X86_64Assembler* assembler = GetAssembler();
1190   LocationSummary* locations = invoke->GetLocations();
1191 
1192   CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1193   CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1194   CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1195   CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1196   CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1197 
1198   NearLabel end, return_true, return_false;
1199 
1200   // Get offsets of count, value, and class fields within a string object.
1201   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1202   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1203   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1204 
1205   // Note that the null check must have been done earlier.
1206   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1207 
1208   StringEqualsOptimizations optimizations(invoke);
1209   if (!optimizations.GetArgumentNotNull()) {
1210     // Check if input is null, return false if it is.
1211     __ testl(arg, arg);
1212     __ j(kEqual, &return_false);
1213   }
1214 
1215   if (!optimizations.GetArgumentIsString()) {
1216     // Instanceof check for the argument by comparing class fields.
1217     // All string objects must have the same type since String cannot be subclassed.
1218     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1219     // If the argument is a string object, its class field must be equal to receiver's class field.
1220     //
1221     // As the String class is expected to be non-movable, we can read the class
1222     // field from String.equals' arguments without read barriers.
1223     AssertNonMovableStringClass();
1224     // Also, because we use the loaded class references only to compare them, we
1225     // don't need to unpoison them.
1226     // /* HeapReference<Class> */ rcx = str->klass_
1227     __ movl(rcx, Address(str, class_offset));
1228     // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1229     __ cmpl(rcx, Address(arg, class_offset));
1230     __ j(kNotEqual, &return_false);
1231   }
1232 
1233   // Reference equality check, return true if same reference.
1234   __ cmpl(str, arg);
1235   __ j(kEqual, &return_true);
1236 
1237   // Load length and compression flag of receiver string.
1238   __ movl(rcx, Address(str, count_offset));
1239   // Check if lengths and compressiond flags are equal, return false if they're not.
1240   // Two identical strings will always have same compression style since
1241   // compression style is decided on alloc.
1242   __ cmpl(rcx, Address(arg, count_offset));
1243   __ j(kNotEqual, &return_false);
1244   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1245   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1246                 "Expecting 0=compressed, 1=uncompressed");
1247   __ jrcxz(&return_true);
1248 
1249   if (mirror::kUseStringCompression) {
1250     NearLabel string_uncompressed;
1251     // Extract length and differentiate between both compressed or both uncompressed.
1252     // Different compression style is cut above.
1253     __ shrl(rcx, Immediate(1));
1254     __ j(kCarrySet, &string_uncompressed);
1255     // Divide string length by 2, rounding up, and continue as if uncompressed.
1256     // Merge clearing the compression flag with +1 for rounding.
1257     __ addl(rcx, Immediate(1));
1258     __ shrl(rcx, Immediate(1));
1259     __ Bind(&string_uncompressed);
1260   }
1261   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1262   __ leal(rsi, Address(str, value_offset));
1263   __ leal(rdi, Address(arg, value_offset));
1264 
1265   // Divide string length by 4 and adjust for lengths not divisible by 4.
1266   __ addl(rcx, Immediate(3));
1267   __ shrl(rcx, Immediate(2));
1268 
1269   // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1270   // or 8 characters (compressed) at a time.
1271   DCHECK_ALIGNED(value_offset, 8);
1272   static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1273 
1274   // Loop to compare strings four characters at a time starting at the beginning of the string.
1275   __ repe_cmpsq();
1276   // If strings are not equal, zero flag will be cleared.
1277   __ j(kNotEqual, &return_false);
1278 
1279   // Return true and exit the function.
1280   // If loop does not result in returning false, we return true.
1281   __ Bind(&return_true);
1282   __ movl(rsi, Immediate(1));
1283   __ jmp(&end);
1284 
1285   // Return false and exit the function.
1286   __ Bind(&return_false);
1287   __ xorl(rsi, rsi);
1288   __ Bind(&end);
1289 }
1290 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1291 static void CreateStringIndexOfLocations(HInvoke* invoke,
1292                                          ArenaAllocator* allocator,
1293                                          bool start_at_zero) {
1294   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1295                                                                LocationSummary::kCallOnSlowPath,
1296                                                                kIntrinsified);
1297   // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1298   locations->SetInAt(0, Location::RegisterLocation(RDI));
1299   // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1300   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1301   // of the instruction explicitly.
1302   // Note: This works as we don't clobber RAX anywhere.
1303   locations->SetInAt(1, Location::RegisterLocation(RAX));
1304   if (!start_at_zero) {
1305     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1306   }
1307   // As we clobber RDI during execution anyways, also use it as the output.
1308   locations->SetOut(Location::SameAsFirstInput());
1309 
1310   // repne scasw uses RCX as the counter.
1311   locations->AddTemp(Location::RegisterLocation(RCX));
1312   // Need another temporary to be able to compute the result.
1313   locations->AddTemp(Location::RequiresRegister());
1314 }
1315 
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1316 static void GenerateStringIndexOf(HInvoke* invoke,
1317                                   X86_64Assembler* assembler,
1318                                   CodeGeneratorX86_64* codegen,
1319                                   bool start_at_zero) {
1320   LocationSummary* locations = invoke->GetLocations();
1321 
1322   // Note that the null check must have been done earlier.
1323   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1324 
1325   CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1326   CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1327   CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1328   CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1329   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1330 
1331   // Check our assumptions for registers.
1332   DCHECK_EQ(string_obj.AsRegister(), RDI);
1333   DCHECK_EQ(search_value.AsRegister(), RAX);
1334   DCHECK_EQ(counter.AsRegister(), RCX);
1335   DCHECK_EQ(out.AsRegister(), RDI);
1336 
1337   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1338   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1339   SlowPathCode* slow_path = nullptr;
1340   HInstruction* code_point = invoke->InputAt(1);
1341   if (code_point->IsIntConstant()) {
1342     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1343     std::numeric_limits<uint16_t>::max()) {
1344       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1345       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1346       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1347       codegen->AddSlowPath(slow_path);
1348       __ jmp(slow_path->GetEntryLabel());
1349       __ Bind(slow_path->GetExitLabel());
1350       return;
1351     }
1352   } else if (code_point->GetType() != DataType::Type::kUint16) {
1353     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1354     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1355     codegen->AddSlowPath(slow_path);
1356     __ j(kAbove, slow_path->GetEntryLabel());
1357   }
1358 
1359   // From here down, we know that we are looking for a char that fits in
1360   // 16 bits (uncompressed) or 8 bits (compressed).
1361   // Location of reference to data array within the String object.
1362   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1363   // Location of count within the String object.
1364   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1365 
1366   // Load the count field of the string containing the length and compression flag.
1367   __ movl(string_length, Address(string_obj, count_offset));
1368 
1369   // Do a zero-length check. Even with string compression `count == 0` means empty.
1370   // TODO: Support jecxz.
1371   NearLabel not_found_label;
1372   __ testl(string_length, string_length);
1373   __ j(kEqual, &not_found_label);
1374 
1375   if (mirror::kUseStringCompression) {
1376     // Use TMP to keep string_length_flagged.
1377     __ movl(CpuRegister(TMP), string_length);
1378     // Mask out first bit used as compression flag.
1379     __ shrl(string_length, Immediate(1));
1380   }
1381 
1382   if (start_at_zero) {
1383     // Number of chars to scan is the same as the string length.
1384     __ movl(counter, string_length);
1385     // Move to the start of the string.
1386     __ addq(string_obj, Immediate(value_offset));
1387   } else {
1388     CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1389 
1390     // Do a start_index check.
1391     __ cmpl(start_index, string_length);
1392     __ j(kGreaterEqual, &not_found_label);
1393 
1394     // Ensure we have a start index >= 0;
1395     __ xorl(counter, counter);
1396     __ cmpl(start_index, Immediate(0));
1397     __ cmov(kGreater, counter, start_index, /* is64bit= */ false);  // 32-bit copy is enough.
1398 
1399     if (mirror::kUseStringCompression) {
1400       NearLabel modify_counter, offset_uncompressed_label;
1401       __ testl(CpuRegister(TMP), Immediate(1));
1402       __ j(kNotZero, &offset_uncompressed_label);
1403       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1404       __ jmp(&modify_counter);
1405       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1406       __ Bind(&offset_uncompressed_label);
1407       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1408       __ Bind(&modify_counter);
1409     } else {
1410       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1411     }
1412     // Now update ecx, the work counter: it's gonna be string.length - start_index.
1413     __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
1414     __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1415   }
1416 
1417   if (mirror::kUseStringCompression) {
1418     NearLabel uncompressed_string_comparison;
1419     NearLabel comparison_done;
1420     __ testl(CpuRegister(TMP), Immediate(1));
1421     __ j(kNotZero, &uncompressed_string_comparison);
1422     // Check if RAX (search_value) is ASCII.
1423     __ cmpl(search_value, Immediate(127));
1424     __ j(kGreater, &not_found_label);
1425     // Comparing byte-per-byte.
1426     __ repne_scasb();
1427     __ jmp(&comparison_done);
1428     // Everything is set up for repne scasw:
1429     //   * Comparison address in RDI.
1430     //   * Counter in ECX.
1431     __ Bind(&uncompressed_string_comparison);
1432     __ repne_scasw();
1433     __ Bind(&comparison_done);
1434   } else {
1435     __ repne_scasw();
1436   }
1437   // Did we find a match?
1438   __ j(kNotEqual, &not_found_label);
1439 
1440   // Yes, we matched.  Compute the index of the result.
1441   __ subl(string_length, counter);
1442   __ leal(out, Address(string_length, -1));
1443 
1444   NearLabel done;
1445   __ jmp(&done);
1446 
1447   // Failed to match; return -1.
1448   __ Bind(&not_found_label);
1449   __ movl(out, Immediate(-1));
1450 
1451   // And join up at the end.
1452   __ Bind(&done);
1453   if (slow_path != nullptr) {
1454     __ Bind(slow_path->GetExitLabel());
1455   }
1456 }
1457 
VisitStringIndexOf(HInvoke * invoke)1458 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1459   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1460 }
1461 
VisitStringIndexOf(HInvoke * invoke)1462 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1463   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1464 }
1465 
VisitStringIndexOfAfter(HInvoke * invoke)1466 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1467   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1468 }
1469 
VisitStringIndexOfAfter(HInvoke * invoke)1470 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1471   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1472 }
1473 
VisitStringNewStringFromBytes(HInvoke * invoke)1474 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1475   LocationSummary* locations = new (allocator_) LocationSummary(
1476       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1477   InvokeRuntimeCallingConvention calling_convention;
1478   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1479   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1480   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1481   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1482   locations->SetOut(Location::RegisterLocation(RAX));
1483 }
1484 
VisitStringNewStringFromBytes(HInvoke * invoke)1485 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1486   X86_64Assembler* assembler = GetAssembler();
1487   LocationSummary* locations = invoke->GetLocations();
1488 
1489   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1490   __ testl(byte_array, byte_array);
1491   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1492   codegen_->AddSlowPath(slow_path);
1493   __ j(kEqual, slow_path->GetEntryLabel());
1494 
1495   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1496   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1497   __ Bind(slow_path->GetExitLabel());
1498 }
1499 
VisitStringNewStringFromChars(HInvoke * invoke)1500 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1501   LocationSummary* locations =
1502       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1503   InvokeRuntimeCallingConvention calling_convention;
1504   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1505   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1506   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1507   locations->SetOut(Location::RegisterLocation(RAX));
1508 }
1509 
VisitStringNewStringFromChars(HInvoke * invoke)1510 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1511   // No need to emit code checking whether `locations->InAt(2)` is a null
1512   // pointer, as callers of the native method
1513   //
1514   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1515   //
1516   // all include a null check on `data` before calling that method.
1517   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1518   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1519 }
1520 
VisitStringNewStringFromString(HInvoke * invoke)1521 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1522   LocationSummary* locations = new (allocator_) LocationSummary(
1523       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1524   InvokeRuntimeCallingConvention calling_convention;
1525   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1526   locations->SetOut(Location::RegisterLocation(RAX));
1527 }
1528 
VisitStringNewStringFromString(HInvoke * invoke)1529 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1530   X86_64Assembler* assembler = GetAssembler();
1531   LocationSummary* locations = invoke->GetLocations();
1532 
1533   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1534   __ testl(string_to_copy, string_to_copy);
1535   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1536   codegen_->AddSlowPath(slow_path);
1537   __ j(kEqual, slow_path->GetEntryLabel());
1538 
1539   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1540   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1541   __ Bind(slow_path->GetExitLabel());
1542 }
1543 
VisitStringGetCharsNoCheck(HInvoke * invoke)1544 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1545   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1546   LocationSummary* locations =
1547       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1548   locations->SetInAt(0, Location::RequiresRegister());
1549   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1550   locations->SetInAt(2, Location::RequiresRegister());
1551   locations->SetInAt(3, Location::RequiresRegister());
1552   locations->SetInAt(4, Location::RequiresRegister());
1553 
1554   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1555   locations->AddTemp(Location::RegisterLocation(RSI));
1556   locations->AddTemp(Location::RegisterLocation(RDI));
1557   locations->AddTemp(Location::RegisterLocation(RCX));
1558 }
1559 
VisitStringGetCharsNoCheck(HInvoke * invoke)1560 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1561   X86_64Assembler* assembler = GetAssembler();
1562   LocationSummary* locations = invoke->GetLocations();
1563 
1564   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1565   // Location of data in char array buffer.
1566   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1567   // Location of char array data in string.
1568   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1569 
1570   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1571   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1572   Location srcBegin = locations->InAt(1);
1573   int srcBegin_value =
1574     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1575   CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1576   CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1577   CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1578 
1579   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1580   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1581   DCHECK_EQ(char_size, 2u);
1582 
1583   NearLabel done;
1584   // Compute the number of chars (words) to move.
1585   __ movl(CpuRegister(RCX), srcEnd);
1586   if (srcBegin.IsConstant()) {
1587     __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1588   } else {
1589     DCHECK(srcBegin.IsRegister());
1590     __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1591   }
1592   if (mirror::kUseStringCompression) {
1593     NearLabel copy_uncompressed, copy_loop;
1594     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1595     DCHECK_EQ(c_char_size, 1u);
1596     // Location of count in string.
1597     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1598 
1599     __ testl(Address(obj, count_offset), Immediate(1));
1600     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1601                   "Expecting 0=compressed, 1=uncompressed");
1602     __ j(kNotZero, &copy_uncompressed);
1603     // Compute the address of the source string by adding the number of chars from
1604     // the source beginning to the value offset of a string.
1605     __ leaq(CpuRegister(RSI),
1606             CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1607     // Start the loop to copy String's value to Array of Char.
1608     __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1609 
1610     __ Bind(&copy_loop);
1611     __ jrcxz(&done);
1612     // Use TMP as temporary (convert byte from RSI to word).
1613     // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1614     __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1615     __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1616     __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1617     __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1618     // TODO: Add support for LOOP to X86_64Assembler.
1619     __ subl(CpuRegister(RCX), Immediate(1));
1620     __ jmp(&copy_loop);
1621 
1622     __ Bind(&copy_uncompressed);
1623   }
1624 
1625   __ leaq(CpuRegister(RSI),
1626           CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1627   // Compute the address of the destination buffer.
1628   __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1629   // Do the move.
1630   __ rep_movsw();
1631 
1632   __ Bind(&done);
1633 }
1634 
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1635 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1636   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1637   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
1638   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1639   // to avoid a SIGBUS.
1640   switch (size) {
1641     case DataType::Type::kInt8:
1642       __ movsxb(out, Address(address, 0));
1643       break;
1644     case DataType::Type::kInt16:
1645       __ movsxw(out, Address(address, 0));
1646       break;
1647     case DataType::Type::kInt32:
1648       __ movl(out, Address(address, 0));
1649       break;
1650     case DataType::Type::kInt64:
1651       __ movq(out, Address(address, 0));
1652       break;
1653     default:
1654       LOG(FATAL) << "Type not recognized for peek: " << size;
1655       UNREACHABLE();
1656   }
1657 }
1658 
VisitMemoryPeekByte(HInvoke * invoke)1659 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1660   CreateIntToIntLocations(allocator_, invoke);
1661 }
1662 
VisitMemoryPeekByte(HInvoke * invoke)1663 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1664   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1665 }
1666 
VisitMemoryPeekIntNative(HInvoke * invoke)1667 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1668   CreateIntToIntLocations(allocator_, invoke);
1669 }
1670 
VisitMemoryPeekIntNative(HInvoke * invoke)1671 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1672   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1673 }
1674 
VisitMemoryPeekLongNative(HInvoke * invoke)1675 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1676   CreateIntToIntLocations(allocator_, invoke);
1677 }
1678 
VisitMemoryPeekLongNative(HInvoke * invoke)1679 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1680   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1681 }
1682 
VisitMemoryPeekShortNative(HInvoke * invoke)1683 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1684   CreateIntToIntLocations(allocator_, invoke);
1685 }
1686 
VisitMemoryPeekShortNative(HInvoke * invoke)1687 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1688   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1689 }
1690 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1691 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1692   LocationSummary* locations =
1693       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1694   locations->SetInAt(0, Location::RequiresRegister());
1695   locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1696 }
1697 
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1698 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1699   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1700   Location value = locations->InAt(1);
1701   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1702   // to avoid a SIGBUS.
1703   switch (size) {
1704     case DataType::Type::kInt8:
1705       if (value.IsConstant()) {
1706         __ movb(Address(address, 0),
1707                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1708       } else {
1709         __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1710       }
1711       break;
1712     case DataType::Type::kInt16:
1713       if (value.IsConstant()) {
1714         __ movw(Address(address, 0),
1715                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1716       } else {
1717         __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1718       }
1719       break;
1720     case DataType::Type::kInt32:
1721       if (value.IsConstant()) {
1722         __ movl(Address(address, 0),
1723                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1724       } else {
1725         __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1726       }
1727       break;
1728     case DataType::Type::kInt64:
1729       if (value.IsConstant()) {
1730         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1731         DCHECK(IsInt<32>(v));
1732         int32_t v_32 = v;
1733         __ movq(Address(address, 0), Immediate(v_32));
1734       } else {
1735         __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1736       }
1737       break;
1738     default:
1739       LOG(FATAL) << "Type not recognized for poke: " << size;
1740       UNREACHABLE();
1741   }
1742 }
1743 
VisitMemoryPokeByte(HInvoke * invoke)1744 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1745   CreateIntIntToVoidLocations(allocator_, invoke);
1746 }
1747 
VisitMemoryPokeByte(HInvoke * invoke)1748 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1749   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1750 }
1751 
VisitMemoryPokeIntNative(HInvoke * invoke)1752 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1753   CreateIntIntToVoidLocations(allocator_, invoke);
1754 }
1755 
VisitMemoryPokeIntNative(HInvoke * invoke)1756 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1757   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1758 }
1759 
VisitMemoryPokeLongNative(HInvoke * invoke)1760 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1761   CreateIntIntToVoidLocations(allocator_, invoke);
1762 }
1763 
VisitMemoryPokeLongNative(HInvoke * invoke)1764 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1765   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1766 }
1767 
VisitMemoryPokeShortNative(HInvoke * invoke)1768 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1769   CreateIntIntToVoidLocations(allocator_, invoke);
1770 }
1771 
VisitMemoryPokeShortNative(HInvoke * invoke)1772 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1773   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1774 }
1775 
VisitThreadCurrentThread(HInvoke * invoke)1776 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1777   LocationSummary* locations =
1778       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1779   locations->SetOut(Location::RequiresRegister());
1780 }
1781 
VisitThreadCurrentThread(HInvoke * invoke)1782 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1783   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1784   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1785                                                     /* no_rip= */ true));
1786 }
1787 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile ATTRIBUTE_UNUSED,CodeGeneratorX86_64 * codegen)1788 static void GenUnsafeGet(HInvoke* invoke,
1789                          DataType::Type type,
1790                          bool is_volatile ATTRIBUTE_UNUSED,
1791                          CodeGeneratorX86_64* codegen) {
1792   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1793   LocationSummary* locations = invoke->GetLocations();
1794   Location base_loc = locations->InAt(1);
1795   CpuRegister base = base_loc.AsRegister<CpuRegister>();
1796   Location offset_loc = locations->InAt(2);
1797   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1798   Location output_loc = locations->Out();
1799   CpuRegister output = output_loc.AsRegister<CpuRegister>();
1800 
1801   switch (type) {
1802     case DataType::Type::kInt32:
1803       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1804       break;
1805 
1806     case DataType::Type::kReference: {
1807       if (kEmitCompilerReadBarrier) {
1808         if (kUseBakerReadBarrier) {
1809           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1810           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1811               invoke, output_loc, base, src, /* needs_null_check= */ false);
1812         } else {
1813           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1814           codegen->GenerateReadBarrierSlow(
1815               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1816         }
1817       } else {
1818         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1819         __ MaybeUnpoisonHeapReference(output);
1820       }
1821       break;
1822     }
1823 
1824     case DataType::Type::kInt64:
1825       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1826       break;
1827 
1828     default:
1829       LOG(FATAL) << "Unsupported op size " << type;
1830       UNREACHABLE();
1831   }
1832 }
1833 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)1834 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1835   bool can_call = kEmitCompilerReadBarrier &&
1836       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1837        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1838   LocationSummary* locations =
1839       new (allocator) LocationSummary(invoke,
1840                                       can_call
1841                                           ? LocationSummary::kCallOnSlowPath
1842                                           : LocationSummary::kNoCall,
1843                                       kIntrinsified);
1844   if (can_call && kUseBakerReadBarrier) {
1845     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1846   }
1847   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1848   locations->SetInAt(1, Location::RequiresRegister());
1849   locations->SetInAt(2, Location::RequiresRegister());
1850   locations->SetOut(Location::RequiresRegister(),
1851                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1852 }
1853 
VisitUnsafeGet(HInvoke * invoke)1854 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1855   CreateIntIntIntToIntLocations(allocator_, invoke);
1856 }
VisitUnsafeGetVolatile(HInvoke * invoke)1857 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1858   CreateIntIntIntToIntLocations(allocator_, invoke);
1859 }
VisitUnsafeGetLong(HInvoke * invoke)1860 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1861   CreateIntIntIntToIntLocations(allocator_, invoke);
1862 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1863 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1864   CreateIntIntIntToIntLocations(allocator_, invoke);
1865 }
VisitUnsafeGetObject(HInvoke * invoke)1866 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1867   CreateIntIntIntToIntLocations(allocator_, invoke);
1868 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1869 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1870   CreateIntIntIntToIntLocations(allocator_, invoke);
1871 }
1872 
1873 
VisitUnsafeGet(HInvoke * invoke)1874 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
1875   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1876 }
VisitUnsafeGetVolatile(HInvoke * invoke)1877 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1878   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1879 }
VisitUnsafeGetLong(HInvoke * invoke)1880 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1881   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1882 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1883 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1884   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1885 }
VisitUnsafeGetObject(HInvoke * invoke)1886 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1887   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1888 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1889 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1890   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1891 }
1892 
1893 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1894 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1895                                                        DataType::Type type,
1896                                                        HInvoke* invoke) {
1897   LocationSummary* locations =
1898       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1899   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1900   locations->SetInAt(1, Location::RequiresRegister());
1901   locations->SetInAt(2, Location::RequiresRegister());
1902   locations->SetInAt(3, Location::RequiresRegister());
1903   if (type == DataType::Type::kReference) {
1904     // Need temp registers for card-marking.
1905     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1906     locations->AddTemp(Location::RequiresRegister());
1907   }
1908 }
1909 
VisitUnsafePut(HInvoke * invoke)1910 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1911   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
1912 }
VisitUnsafePutOrdered(HInvoke * invoke)1913 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1914   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
1915 }
VisitUnsafePutVolatile(HInvoke * invoke)1916 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1917   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
1918 }
VisitUnsafePutObject(HInvoke * invoke)1919 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1920   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
1921 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1922 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1923   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
1924 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1925 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1926   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
1927 }
VisitUnsafePutLong(HInvoke * invoke)1928 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1929   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
1930 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1931 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1932   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
1933 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1934 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1935   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
1936 }
1937 
1938 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1939 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1940 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
1941                          CodeGeneratorX86_64* codegen) {
1942   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1943   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1944   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1945   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1946 
1947   if (type == DataType::Type::kInt64) {
1948     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1949   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1950     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1951     __ movl(temp, value);
1952     __ PoisonHeapReference(temp);
1953     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
1954   } else {
1955     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1956   }
1957 
1958   if (is_volatile) {
1959     codegen->MemoryFence();
1960   }
1961 
1962   if (type == DataType::Type::kReference) {
1963     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1964     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1965                         locations->GetTemp(1).AsRegister<CpuRegister>(),
1966                         base,
1967                         value,
1968                         value_can_be_null);
1969   }
1970 }
1971 
VisitUnsafePut(HInvoke * invoke)1972 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1973   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1974 }
VisitUnsafePutOrdered(HInvoke * invoke)1975 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1976   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1977 }
VisitUnsafePutVolatile(HInvoke * invoke)1978 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1979   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1980 }
VisitUnsafePutObject(HInvoke * invoke)1981 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1982   GenUnsafePut(
1983       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1984 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1985 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1986   GenUnsafePut(
1987       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1988 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1989 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1990   GenUnsafePut(
1991       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1992 }
VisitUnsafePutLong(HInvoke * invoke)1993 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1994   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1995 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1996 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1997   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1998 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1999 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2000   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
2001 }
2002 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2003 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
2004                                        DataType::Type type,
2005                                        HInvoke* invoke) {
2006   bool can_call = kEmitCompilerReadBarrier &&
2007       kUseBakerReadBarrier &&
2008       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
2009   LocationSummary* locations =
2010       new (allocator) LocationSummary(invoke,
2011                                       can_call
2012                                           ? LocationSummary::kCallOnSlowPath
2013                                           : LocationSummary::kNoCall,
2014                                       kIntrinsified);
2015   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2016   locations->SetInAt(1, Location::RequiresRegister());
2017   locations->SetInAt(2, Location::RequiresRegister());
2018   // expected value must be in EAX/RAX.
2019   locations->SetInAt(3, Location::RegisterLocation(RAX));
2020   locations->SetInAt(4, Location::RequiresRegister());
2021 
2022   locations->SetOut(Location::RequiresRegister());
2023   if (type == DataType::Type::kReference) {
2024     // Need temporary registers for card-marking, and possibly for
2025     // (Baker) read barrier.
2026     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2027     locations->AddTemp(Location::RequiresRegister());
2028   }
2029 }
2030 
VisitUnsafeCASInt(HInvoke * invoke)2031 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2032   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
2033 }
2034 
VisitUnsafeCASLong(HInvoke * invoke)2035 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2036   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
2037 }
2038 
VisitUnsafeCASObject(HInvoke * invoke)2039 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2040   // The only read barrier implementation supporting the
2041   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2042   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2043     return;
2044   }
2045 
2046   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
2047 }
2048 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2049 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2050   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2051   LocationSummary* locations = invoke->GetLocations();
2052 
2053   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2054   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2055   CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
2056   // Ensure `expected` is in RAX (required by the CMPXCHG instruction).
2057   DCHECK_EQ(expected.AsRegister(), RAX);
2058   CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
2059   Location out_loc = locations->Out();
2060   CpuRegister out = out_loc.AsRegister<CpuRegister>();
2061 
2062   if (type == DataType::Type::kReference) {
2063     // The only read barrier implementation supporting the
2064     // UnsafeCASObject intrinsic is the Baker-style read barriers.
2065     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2066 
2067     CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2068     CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2069 
2070     // Mark card for object assuming new value is stored.
2071     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2072     codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2073 
2074     // The address of the field within the holding object.
2075     Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2076 
2077     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2078       // Need to make sure the reference stored in the field is a to-space
2079       // one before attempting the CAS or the CAS could fail incorrectly.
2080       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2081           invoke,
2082           out_loc,  // Unused, used only as a "temporary" within the read barrier.
2083           base,
2084           field_addr,
2085           /* needs_null_check= */ false,
2086           /* always_update_field= */ true,
2087           &temp1,
2088           &temp2);
2089     }
2090 
2091     bool base_equals_value = (base.AsRegister() == value.AsRegister());
2092     Register value_reg = value.AsRegister();
2093     if (kPoisonHeapReferences) {
2094       if (base_equals_value) {
2095         // If `base` and `value` are the same register location, move
2096         // `value_reg` to a temporary register.  This way, poisoning
2097         // `value_reg` won't invalidate `base`.
2098         value_reg = temp1.AsRegister();
2099         __ movl(CpuRegister(value_reg), base);
2100       }
2101 
2102       // Check that the register allocator did not assign the location
2103       // of `expected` (RAX) to `value` nor to `base`, so that heap
2104       // poisoning (when enabled) works as intended below.
2105       // - If `value` were equal to `expected`, both references would
2106       //   be poisoned twice, meaning they would not be poisoned at
2107       //   all, as heap poisoning uses address negation.
2108       // - If `base` were equal to `expected`, poisoning `expected`
2109       //   would invalidate `base`.
2110       DCHECK_NE(value_reg, expected.AsRegister());
2111       DCHECK_NE(base.AsRegister(), expected.AsRegister());
2112 
2113       __ PoisonHeapReference(expected);
2114       __ PoisonHeapReference(CpuRegister(value_reg));
2115     }
2116 
2117     __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2118 
2119     // LOCK CMPXCHG has full barrier semantics, and we don't need
2120     // scheduling barriers at this time.
2121 
2122     // Convert ZF into the Boolean result.
2123     __ setcc(kZero, out);
2124     __ movzxb(out, out);
2125 
2126     // If heap poisoning is enabled, we need to unpoison the values
2127     // that were poisoned earlier.
2128     if (kPoisonHeapReferences) {
2129       if (base_equals_value) {
2130         // `value_reg` has been moved to a temporary register, no need
2131         // to unpoison it.
2132       } else {
2133         // Ensure `value` is different from `out`, so that unpoisoning
2134         // the former does not invalidate the latter.
2135         DCHECK_NE(value_reg, out.AsRegister());
2136         __ UnpoisonHeapReference(CpuRegister(value_reg));
2137       }
2138       // Ensure `expected` is different from `out`, so that unpoisoning
2139       // the former does not invalidate the latter.
2140       DCHECK_NE(expected.AsRegister(), out.AsRegister());
2141       __ UnpoisonHeapReference(expected);
2142     }
2143   } else {
2144     if (type == DataType::Type::kInt32) {
2145       __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
2146     } else if (type == DataType::Type::kInt64) {
2147       __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
2148     } else {
2149       LOG(FATAL) << "Unexpected CAS type " << type;
2150     }
2151 
2152     // LOCK CMPXCHG has full barrier semantics, and we don't need
2153     // scheduling barriers at this time.
2154 
2155     // Convert ZF into the Boolean result.
2156     __ setcc(kZero, out);
2157     __ movzxb(out, out);
2158   }
2159 }
2160 
VisitUnsafeCASInt(HInvoke * invoke)2161 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2162   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2163 }
2164 
VisitUnsafeCASLong(HInvoke * invoke)2165 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2166   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2167 }
2168 
VisitUnsafeCASObject(HInvoke * invoke)2169 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2170   // The only read barrier implementation supporting the
2171   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2172   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2173 
2174   GenCAS(DataType::Type::kReference, invoke, codegen_);
2175 }
2176 
VisitIntegerReverse(HInvoke * invoke)2177 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2178   LocationSummary* locations =
2179       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2180   locations->SetInAt(0, Location::RequiresRegister());
2181   locations->SetOut(Location::SameAsFirstInput());
2182   locations->AddTemp(Location::RequiresRegister());
2183 }
2184 
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2185 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2186                      X86_64Assembler* assembler) {
2187   Immediate imm_shift(shift);
2188   Immediate imm_mask(mask);
2189   __ movl(temp, reg);
2190   __ shrl(reg, imm_shift);
2191   __ andl(temp, imm_mask);
2192   __ andl(reg, imm_mask);
2193   __ shll(temp, imm_shift);
2194   __ orl(reg, temp);
2195 }
2196 
VisitIntegerReverse(HInvoke * invoke)2197 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2198   X86_64Assembler* assembler = GetAssembler();
2199   LocationSummary* locations = invoke->GetLocations();
2200 
2201   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2202   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2203 
2204   /*
2205    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2206    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2207    * compared to generic luni implementation which has 5 rounds of swapping bits.
2208    * x = bswap x
2209    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2210    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2211    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2212    */
2213   __ bswapl(reg);
2214   SwapBits(reg, temp, 1, 0x55555555, assembler);
2215   SwapBits(reg, temp, 2, 0x33333333, assembler);
2216   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2217 }
2218 
VisitLongReverse(HInvoke * invoke)2219 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2220   LocationSummary* locations =
2221       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2222   locations->SetInAt(0, Location::RequiresRegister());
2223   locations->SetOut(Location::SameAsFirstInput());
2224   locations->AddTemp(Location::RequiresRegister());
2225   locations->AddTemp(Location::RequiresRegister());
2226 }
2227 
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2228 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2229                        int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2230   Immediate imm_shift(shift);
2231   __ movq(temp_mask, Immediate(mask));
2232   __ movq(temp, reg);
2233   __ shrq(reg, imm_shift);
2234   __ andq(temp, temp_mask);
2235   __ andq(reg, temp_mask);
2236   __ shlq(temp, imm_shift);
2237   __ orq(reg, temp);
2238 }
2239 
VisitLongReverse(HInvoke * invoke)2240 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2241   X86_64Assembler* assembler = GetAssembler();
2242   LocationSummary* locations = invoke->GetLocations();
2243 
2244   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2245   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2246   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2247 
2248   /*
2249    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2250    * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2251    * compared to generic luni implementation which has 5 rounds of swapping bits.
2252    * x = bswap x
2253    * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2254    * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2255    * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2256    */
2257   __ bswapq(reg);
2258   SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2259   SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2260   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2261 }
2262 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2263 static void CreateBitCountLocations(
2264     ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2265   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2266     // Do nothing if there is no popcnt support. This results in generating
2267     // a call for the intrinsic rather than direct code.
2268     return;
2269   }
2270   LocationSummary* locations =
2271       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2272   locations->SetInAt(0, Location::Any());
2273   locations->SetOut(Location::RequiresRegister());
2274 }
2275 
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2276 static void GenBitCount(X86_64Assembler* assembler,
2277                         CodeGeneratorX86_64* codegen,
2278                         HInvoke* invoke,
2279                         bool is_long) {
2280   LocationSummary* locations = invoke->GetLocations();
2281   Location src = locations->InAt(0);
2282   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2283 
2284   if (invoke->InputAt(0)->IsConstant()) {
2285     // Evaluate this at compile time.
2286     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2287     int32_t result = is_long
2288         ? POPCOUNT(static_cast<uint64_t>(value))
2289         : POPCOUNT(static_cast<uint32_t>(value));
2290     codegen->Load32BitValue(out, result);
2291     return;
2292   }
2293 
2294   if (src.IsRegister()) {
2295     if (is_long) {
2296       __ popcntq(out, src.AsRegister<CpuRegister>());
2297     } else {
2298       __ popcntl(out, src.AsRegister<CpuRegister>());
2299     }
2300   } else if (is_long) {
2301     DCHECK(src.IsDoubleStackSlot());
2302     __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2303   } else {
2304     DCHECK(src.IsStackSlot());
2305     __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2306   }
2307 }
2308 
VisitIntegerBitCount(HInvoke * invoke)2309 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2310   CreateBitCountLocations(allocator_, codegen_, invoke);
2311 }
2312 
VisitIntegerBitCount(HInvoke * invoke)2313 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2314   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2315 }
2316 
VisitLongBitCount(HInvoke * invoke)2317 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2318   CreateBitCountLocations(allocator_, codegen_, invoke);
2319 }
2320 
VisitLongBitCount(HInvoke * invoke)2321 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2322   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2323 }
2324 
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)2325 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
2326   LocationSummary* locations =
2327       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2328   locations->SetInAt(0, Location::Any());
2329   locations->SetOut(Location::RequiresRegister());
2330   locations->AddTemp(is_high ? Location::RegisterLocation(RCX)  // needs CL
2331                              : Location::RequiresRegister());  // any will do
2332 }
2333 
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2334 static void GenOneBit(X86_64Assembler* assembler,
2335                       CodeGeneratorX86_64* codegen,
2336                       HInvoke* invoke,
2337                       bool is_high, bool is_long) {
2338   LocationSummary* locations = invoke->GetLocations();
2339   Location src = locations->InAt(0);
2340   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2341 
2342   if (invoke->InputAt(0)->IsConstant()) {
2343     // Evaluate this at compile time.
2344     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2345     if (value == 0) {
2346       __ xorl(out, out);  // Clears upper bits too.
2347       return;
2348     }
2349     // Nonzero value.
2350     if (is_high) {
2351       value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2352                       : 31 - CLZ(static_cast<uint32_t>(value));
2353     } else {
2354       value = is_long ? CTZ(static_cast<uint64_t>(value))
2355                       : CTZ(static_cast<uint32_t>(value));
2356     }
2357     if (is_long) {
2358       codegen->Load64BitValue(out, 1ULL << value);
2359     } else {
2360       codegen->Load32BitValue(out, 1 << value);
2361     }
2362     return;
2363   }
2364 
2365   // Handle the non-constant cases.
2366   if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
2367       src.IsRegister()) {
2368       __ blsi(out, src.AsRegister<CpuRegister>());
2369   } else {
2370     CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2371     if (is_high) {
2372       // Use architectural support: basically 1 << bsr.
2373       if (src.IsRegister()) {
2374         if (is_long) {
2375           __ bsrq(tmp, src.AsRegister<CpuRegister>());
2376         } else {
2377           __ bsrl(tmp, src.AsRegister<CpuRegister>());
2378         }
2379       } else if (is_long) {
2380         DCHECK(src.IsDoubleStackSlot());
2381         __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2382       } else {
2383         DCHECK(src.IsStackSlot());
2384         __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2385       }
2386       // BSR sets ZF if the input was zero.
2387       NearLabel is_zero, done;
2388       __ j(kEqual, &is_zero);
2389       __ movl(out, Immediate(1));  // Clears upper bits too.
2390       if (is_long) {
2391         __ shlq(out, tmp);
2392       } else {
2393         __ shll(out, tmp);
2394       }
2395       __ jmp(&done);
2396       __ Bind(&is_zero);
2397       __ xorl(out, out);  // Clears upper bits too.
2398       __ Bind(&done);
2399     } else  {
2400       // Copy input into temporary.
2401       if (src.IsRegister()) {
2402         if (is_long) {
2403           __ movq(tmp, src.AsRegister<CpuRegister>());
2404         } else {
2405           __ movl(tmp, src.AsRegister<CpuRegister>());
2406         }
2407       } else if (is_long) {
2408         DCHECK(src.IsDoubleStackSlot());
2409         __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2410       } else {
2411         DCHECK(src.IsStackSlot());
2412         __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2413       }
2414       // Do the bit twiddling: basically tmp & -tmp;
2415       if (is_long) {
2416         __ movq(out, tmp);
2417         __ negq(tmp);
2418         __ andq(out, tmp);
2419       } else {
2420         __ movl(out, tmp);
2421         __ negl(tmp);
2422         __ andl(out, tmp);
2423       }
2424     }
2425   }
2426 }
2427 
VisitIntegerHighestOneBit(HInvoke * invoke)2428 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2429   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
2430 }
2431 
VisitIntegerHighestOneBit(HInvoke * invoke)2432 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2433   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
2434 }
2435 
VisitLongHighestOneBit(HInvoke * invoke)2436 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2437   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
2438 }
2439 
VisitLongHighestOneBit(HInvoke * invoke)2440 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2441   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
2442 }
2443 
VisitIntegerLowestOneBit(HInvoke * invoke)2444 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2445   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
2446 }
2447 
VisitIntegerLowestOneBit(HInvoke * invoke)2448 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2449   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
2450 }
2451 
VisitLongLowestOneBit(HInvoke * invoke)2452 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2453   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
2454 }
2455 
VisitLongLowestOneBit(HInvoke * invoke)2456 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2457   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
2458 }
2459 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2460 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2461   LocationSummary* locations =
2462       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2463   locations->SetInAt(0, Location::Any());
2464   locations->SetOut(Location::RequiresRegister());
2465 }
2466 
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2467 static void GenLeadingZeros(X86_64Assembler* assembler,
2468                             CodeGeneratorX86_64* codegen,
2469                             HInvoke* invoke, bool is_long) {
2470   LocationSummary* locations = invoke->GetLocations();
2471   Location src = locations->InAt(0);
2472   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2473 
2474   int zero_value_result = is_long ? 64 : 32;
2475   if (invoke->InputAt(0)->IsConstant()) {
2476     // Evaluate this at compile time.
2477     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2478     if (value == 0) {
2479       value = zero_value_result;
2480     } else {
2481       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2482     }
2483     codegen->Load32BitValue(out, value);
2484     return;
2485   }
2486 
2487   // Handle the non-constant cases.
2488   if (src.IsRegister()) {
2489     if (is_long) {
2490       __ bsrq(out, src.AsRegister<CpuRegister>());
2491     } else {
2492       __ bsrl(out, src.AsRegister<CpuRegister>());
2493     }
2494   } else if (is_long) {
2495     DCHECK(src.IsDoubleStackSlot());
2496     __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2497   } else {
2498     DCHECK(src.IsStackSlot());
2499     __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2500   }
2501 
2502   // BSR sets ZF if the input was zero, and the output is undefined.
2503   NearLabel is_zero, done;
2504   __ j(kEqual, &is_zero);
2505 
2506   // Correct the result from BSR to get the CLZ result.
2507   __ xorl(out, Immediate(zero_value_result - 1));
2508   __ jmp(&done);
2509 
2510   // Fix the zero case with the expected result.
2511   __ Bind(&is_zero);
2512   __ movl(out, Immediate(zero_value_result));
2513 
2514   __ Bind(&done);
2515 }
2516 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2517 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2518   CreateLeadingZeroLocations(allocator_, invoke);
2519 }
2520 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2521 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2522   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2523 }
2524 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2525 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2526   CreateLeadingZeroLocations(allocator_, invoke);
2527 }
2528 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2529 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2530   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2531 }
2532 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2533 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2534   LocationSummary* locations =
2535       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2536   locations->SetInAt(0, Location::Any());
2537   locations->SetOut(Location::RequiresRegister());
2538 }
2539 
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2540 static void GenTrailingZeros(X86_64Assembler* assembler,
2541                              CodeGeneratorX86_64* codegen,
2542                              HInvoke* invoke, bool is_long) {
2543   LocationSummary* locations = invoke->GetLocations();
2544   Location src = locations->InAt(0);
2545   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2546 
2547   int zero_value_result = is_long ? 64 : 32;
2548   if (invoke->InputAt(0)->IsConstant()) {
2549     // Evaluate this at compile time.
2550     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2551     if (value == 0) {
2552       value = zero_value_result;
2553     } else {
2554       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2555     }
2556     codegen->Load32BitValue(out, value);
2557     return;
2558   }
2559 
2560   // Handle the non-constant cases.
2561   if (src.IsRegister()) {
2562     if (is_long) {
2563       __ bsfq(out, src.AsRegister<CpuRegister>());
2564     } else {
2565       __ bsfl(out, src.AsRegister<CpuRegister>());
2566     }
2567   } else if (is_long) {
2568     DCHECK(src.IsDoubleStackSlot());
2569     __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2570   } else {
2571     DCHECK(src.IsStackSlot());
2572     __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2573   }
2574 
2575   // BSF sets ZF if the input was zero, and the output is undefined.
2576   NearLabel done;
2577   __ j(kNotEqual, &done);
2578 
2579   // Fix the zero case with the expected result.
2580   __ movl(out, Immediate(zero_value_result));
2581 
2582   __ Bind(&done);
2583 }
2584 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2585 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2586   CreateTrailingZeroLocations(allocator_, invoke);
2587 }
2588 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2589 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2590   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2591 }
2592 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2593 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2594   CreateTrailingZeroLocations(allocator_, invoke);
2595 }
2596 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2597 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2598   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2599 }
2600 
VisitIntegerValueOf(HInvoke * invoke)2601 void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
2602   InvokeRuntimeCallingConvention calling_convention;
2603   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2604       invoke,
2605       codegen_,
2606       Location::RegisterLocation(RAX),
2607       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2608 }
2609 
VisitIntegerValueOf(HInvoke * invoke)2610 void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
2611   IntrinsicVisitor::IntegerValueOfInfo info =
2612       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2613   LocationSummary* locations = invoke->GetLocations();
2614   X86_64Assembler* assembler = GetAssembler();
2615 
2616   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2617   InvokeRuntimeCallingConvention calling_convention;
2618   CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
2619   auto allocate_instance = [&]() {
2620     codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
2621     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2622     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2623   };
2624   if (invoke->InputAt(0)->IsIntConstant()) {
2625     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2626     if (static_cast<uint32_t>(value - info.low) < info.length) {
2627       // Just embed the j.l.Integer in the code.
2628       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2629       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2630     } else {
2631       DCHECK(locations->CanCall());
2632       // Allocate and initialize a new j.l.Integer.
2633       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2634       // JIT object table.
2635       allocate_instance();
2636       __ movl(Address(out, info.value_offset), Immediate(value));
2637     }
2638   } else {
2639     DCHECK(locations->CanCall());
2640     CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
2641     // Check bounds of our cache.
2642     __ leal(out, Address(in, -info.low));
2643     __ cmpl(out, Immediate(info.length));
2644     NearLabel allocate, done;
2645     __ j(kAboveEqual, &allocate);
2646     // If the value is within the bounds, load the j.l.Integer directly from the array.
2647     DCHECK_NE(out.AsRegister(), argument.AsRegister());
2648     codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
2649     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
2650                   "Check heap reference size.");
2651     __ movl(out, Address(argument, out, TIMES_4, 0));
2652     __ MaybeUnpoisonHeapReference(out);
2653     __ jmp(&done);
2654     __ Bind(&allocate);
2655     // Otherwise allocate and initialize a new j.l.Integer.
2656     allocate_instance();
2657     __ movl(Address(out, info.value_offset), in);
2658     __ Bind(&done);
2659   }
2660 }
2661 
VisitReferenceGetReferent(HInvoke * invoke)2662 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
2663   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
2664 }
2665 
VisitReferenceGetReferent(HInvoke * invoke)2666 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
2667   X86_64Assembler* assembler = GetAssembler();
2668   LocationSummary* locations = invoke->GetLocations();
2669 
2670   Location obj = locations->InAt(0);
2671   Location out = locations->Out();
2672 
2673   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
2674   codegen_->AddSlowPath(slow_path);
2675 
2676   if (kEmitCompilerReadBarrier) {
2677     // Check self->GetWeakRefAccessEnabled().
2678     ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
2679     __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true), Immediate(0));
2680     __ j(kEqual, slow_path->GetEntryLabel());
2681   }
2682 
2683   // Load the java.lang.ref.Reference class, use the output register as a temporary.
2684   codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
2685 
2686   // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
2687   MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
2688   DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
2689   DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
2690             IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
2691   __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
2692           Immediate(0));
2693   __ j(kNotEqual, slow_path->GetEntryLabel());
2694 
2695   // Load the value from the field.
2696   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2697   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2698     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2699                                                     out,
2700                                                     obj.AsRegister<CpuRegister>(),
2701                                                     referent_offset,
2702                                                     /*needs_null_check=*/ true);
2703     // Note that the fence is a no-op, thanks to the x86-64 memory model.
2704     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2705   } else {
2706     __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
2707     codegen_->MaybeRecordImplicitNullCheck(invoke);
2708     // Note that the fence is a no-op, thanks to the x86-64 memory model.
2709     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2710     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
2711   }
2712   __ Bind(slow_path->GetExitLabel());
2713 }
2714 
VisitReferenceRefersTo(HInvoke * invoke)2715 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
2716   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
2717 }
2718 
VisitReferenceRefersTo(HInvoke * invoke)2719 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
2720   X86_64Assembler* assembler = GetAssembler();
2721   LocationSummary* locations = invoke->GetLocations();
2722 
2723   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
2724   CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
2725   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2726 
2727   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2728   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2729 
2730   __ movl(out, Address(obj, referent_offset));
2731   codegen_->MaybeRecordImplicitNullCheck(invoke);
2732   __ MaybeUnpoisonHeapReference(out);
2733   // Note that the fence is a no-op, thanks to the x86-64 memory model.
2734   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2735 
2736   __ cmpl(out, other);
2737 
2738   if (kEmitCompilerReadBarrier) {
2739     DCHECK(kUseBakerReadBarrier);
2740 
2741     NearLabel calculate_result;
2742     __ j(kEqual, &calculate_result);  // ZF set if taken.
2743 
2744     // Check if the loaded reference is null in a way that leaves ZF clear for null.
2745     __ cmpl(out, Immediate(1));
2746     __ j(kBelow, &calculate_result);  // ZF clear if taken.
2747 
2748     // For correct memory visibility, we need a barrier before loading the lock word
2749     // but we already have the barrier emitted for volatile load above which is sufficient.
2750 
2751     // Load the lockword and check if it is a forwarding address.
2752     static_assert(LockWord::kStateShift == 30u);
2753     static_assert(LockWord::kStateForwardingAddress == 3u);
2754     __ movl(out, Address(out, monitor_offset));
2755     __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
2756     __ j(kBelow, &calculate_result);   // ZF clear if taken.
2757 
2758     // Extract the forwarding address and compare with `other`.
2759     __ shll(out, Immediate(LockWord::kForwardingAddressShift));
2760     __ cmpl(out, other);
2761 
2762     __ Bind(&calculate_result);
2763   }
2764 
2765   // Convert ZF into the Boolean result.
2766   __ setcc(kEqual, out);
2767   __ movzxb(out, out);
2768 }
2769 
VisitThreadInterrupted(HInvoke * invoke)2770 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
2771   LocationSummary* locations =
2772       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2773   locations->SetOut(Location::RequiresRegister());
2774 }
2775 
VisitThreadInterrupted(HInvoke * invoke)2776 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
2777   X86_64Assembler* assembler = GetAssembler();
2778   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
2779   Address address = Address::Absolute
2780       (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
2781   NearLabel done;
2782   __ gs()->movl(out, address);
2783   __ testl(out, out);
2784   __ j(kEqual, &done);
2785   __ gs()->movl(address, Immediate(0));
2786   codegen_->MemoryFence();
2787   __ Bind(&done);
2788 }
2789 
VisitReachabilityFence(HInvoke * invoke)2790 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
2791   LocationSummary* locations =
2792       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2793   locations->SetInAt(0, Location::Any());
2794 }
2795 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)2796 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
2797 
VisitIntegerDivideUnsigned(HInvoke * invoke)2798 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2799   LocationSummary* locations =
2800       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2801   locations->SetInAt(0, Location::RegisterLocation(RAX));
2802   locations->SetInAt(1, Location::RequiresRegister());
2803   locations->SetOut(Location::SameAsFirstInput());
2804   // Intel uses edx:eax as the dividend.
2805   locations->AddTemp(Location::RegisterLocation(RDX));
2806 }
2807 
VisitIntegerDivideUnsigned(HInvoke * invoke)2808 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2809   X86_64Assembler* assembler = GetAssembler();
2810   LocationSummary* locations = invoke->GetLocations();
2811   Location out = locations->Out();
2812   Location first = locations->InAt(0);
2813   Location second = locations->InAt(1);
2814   CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
2815   CpuRegister second_reg = second.AsRegister<CpuRegister>();
2816 
2817   DCHECK_EQ(RAX, first.AsRegister<Register>());
2818   DCHECK_EQ(RAX, out.AsRegister<Register>());
2819   DCHECK_EQ(RDX, rdx.AsRegister());
2820 
2821   // Check if divisor is zero, bail to managed implementation to handle.
2822   __ testl(second_reg, second_reg);
2823   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
2824   codegen_->AddSlowPath(slow_path);
2825   __ j(kEqual, slow_path->GetEntryLabel());
2826 
2827   __ xorl(rdx, rdx);
2828   __ divl(second_reg);
2829 
2830   __ Bind(slow_path->GetExitLabel());
2831 }
2832 
VisitMathMultiplyHigh(HInvoke * invoke)2833 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
2834   LocationSummary* locations =
2835       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2836   locations->SetInAt(0, Location::RegisterLocation(RAX));
2837   locations->SetInAt(1, Location::RequiresRegister());
2838   locations->SetOut(Location::RegisterLocation(RDX));
2839   locations->AddTemp(Location::RegisterLocation(RAX));
2840 }
2841 
VisitMathMultiplyHigh(HInvoke * invoke)2842 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
2843   X86_64Assembler* assembler = GetAssembler();
2844   LocationSummary* locations = invoke->GetLocations();
2845 
2846   CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
2847 
2848   DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
2849   DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
2850 
2851   __ imulq(y);
2852 }
2853 
2854 
2855 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
2856 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
2857 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)
2858 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes)
2859 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer)
2860 UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToFloat)
2861 UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf)
2862 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Floor)
2863 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Ceil)
2864 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Rint)
2865 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Greater)
2866 UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals)
2867 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less)
2868 UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals)
2869 UNIMPLEMENTED_INTRINSIC(X86_64, LongDivideUnsigned)
2870 
2871 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
2872 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
2873 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend);
2874 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength);
2875 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString);
2876 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendObject);
2877 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendString);
2878 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharSequence);
2879 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharArray);
2880 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendBoolean);
2881 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendChar);
2882 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendInt);
2883 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendLong);
2884 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendFloat);
2885 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendDouble);
2886 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength);
2887 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
2888 
2889 // 1.8.
2890 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
2891 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
2892 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
2893 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
2894 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
2895 
2896 UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvokeExact)
2897 UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvoke)
2898 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleCompareAndExchange)
2899 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleCompareAndExchangeAcquire)
2900 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleCompareAndExchangeRelease)
2901 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleCompareAndSet)
2902 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGet)
2903 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAcquire)
2904 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndAdd)
2905 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndAddAcquire)
2906 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndAddRelease)
2907 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseAnd)
2908 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseAndAcquire)
2909 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseAndRelease)
2910 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseOr)
2911 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseOrAcquire)
2912 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseOrRelease)
2913 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseXor)
2914 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseXorAcquire)
2915 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseXorRelease)
2916 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSet)
2917 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetAcquire)
2918 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetRelease)
2919 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetOpaque)
2920 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetVolatile)
2921 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSet)
2922 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetOpaque)
2923 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetRelease)
2924 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetVolatile)
2925 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSet)
2926 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSetAcquire)
2927 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSetPlain)
2928 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSetRelease)
2929 
2930 UNREACHABLE_INTRINSICS(X86_64)
2931 
2932 #undef __
2933 
2934 }  // namespace x86_64
2935 }  // namespace art
2936