1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86_64.h"
18 
19 #include <limits>
20 
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "intrinsics.h"
27 #include "intrinsics_utils.h"
28 #include "lock_word.h"
29 #include "mirror/array-inl.h"
30 #include "mirror/object_array-inl.h"
31 #include "mirror/reference.h"
32 #include "mirror/string.h"
33 #include "scoped_thread_state_change-inl.h"
34 #include "thread-inl.h"
35 #include "utils/x86_64/assembler_x86_64.h"
36 #include "utils/x86_64/constants_x86_64.h"
37 
38 namespace art {
39 
40 namespace x86_64 {
41 
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)42 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
43   : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
44 }
45 
GetAssembler()46 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
47   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
48 }
49 
GetAllocator()50 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
51   return codegen_->GetGraph()->GetArena();
52 }
53 
TryDispatch(HInvoke * invoke)54 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
55   Dispatch(invoke);
56   LocationSummary* res = invoke->GetLocations();
57   if (res == nullptr) {
58     return false;
59   }
60   return res->Intrinsified();
61 }
62 
MoveArguments(HInvoke * invoke,CodeGeneratorX86_64 * codegen)63 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
64   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
65   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
66 }
67 
68 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
69 
70 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
71 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
72 
73 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
74 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
75  public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)76   explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
77       : SlowPathCode(instruction) {
78     DCHECK(kEmitCompilerReadBarrier);
79     DCHECK(kUseBakerReadBarrier);
80   }
81 
EmitNativeCode(CodeGenerator * codegen)82   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
83     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
84     LocationSummary* locations = instruction_->GetLocations();
85     DCHECK(locations->CanCall());
86     DCHECK(instruction_->IsInvokeStaticOrDirect())
87         << "Unexpected instruction in read barrier arraycopy slow path: "
88         << instruction_->DebugName();
89     DCHECK(instruction_->GetLocations()->Intrinsified());
90     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
91 
92     int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
93 
94     CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
95     CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
96     CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
97 
98     __ Bind(GetEntryLabel());
99     NearLabel loop;
100     __ Bind(&loop);
101     __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
102     __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
103     // TODO: Inline the mark bit check before calling the runtime?
104     // TMP = ReadBarrier::Mark(TMP);
105     // No need to save live registers; it's taken care of by the
106     // entrypoint. Also, there is no need to update the stack mask,
107     // as this runtime call will not trigger a garbage collection.
108     int32_t entry_point_offset =
109         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
110     // This runtime call does not require a stack map.
111     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
112     __ MaybePoisonHeapReference(CpuRegister(TMP));
113     __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
114     __ addl(src_curr_addr, Immediate(element_size));
115     __ addl(dst_curr_addr, Immediate(element_size));
116     __ cmpl(src_curr_addr, src_stop_addr);
117     __ j(kNotEqual, &loop);
118     __ jmp(GetExitLabel());
119   }
120 
GetDescription() const121   const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
122 
123  private:
124   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
125 };
126 
127 #undef __
128 
129 #define __ assembler->
130 
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)131 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
132   LocationSummary* locations = new (arena) LocationSummary(invoke,
133                                                            LocationSummary::kNoCall,
134                                                            kIntrinsified);
135   locations->SetInAt(0, Location::RequiresFpuRegister());
136   locations->SetOut(Location::RequiresRegister());
137 }
138 
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)139 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
140   LocationSummary* locations = new (arena) LocationSummary(invoke,
141                                                            LocationSummary::kNoCall,
142                                                            kIntrinsified);
143   locations->SetInAt(0, Location::RequiresRegister());
144   locations->SetOut(Location::RequiresFpuRegister());
145 }
146 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)147 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
148   Location input = locations->InAt(0);
149   Location output = locations->Out();
150   __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
151 }
152 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)153 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
154   Location input = locations->InAt(0);
155   Location output = locations->Out();
156   __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
157 }
158 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)159 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
160   CreateFPToIntLocations(arena_, invoke);
161 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)162 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
163   CreateIntToFPLocations(arena_, invoke);
164 }
165 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)166 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
167   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
168 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)169 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
170   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
171 }
172 
VisitFloatFloatToRawIntBits(HInvoke * invoke)173 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
174   CreateFPToIntLocations(arena_, invoke);
175 }
VisitFloatIntBitsToFloat(HInvoke * invoke)176 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
177   CreateIntToFPLocations(arena_, invoke);
178 }
179 
VisitFloatFloatToRawIntBits(HInvoke * invoke)180 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
181   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
182 }
VisitFloatIntBitsToFloat(HInvoke * invoke)183 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
184   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
185 }
186 
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)187 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
188   LocationSummary* locations = new (arena) LocationSummary(invoke,
189                                                            LocationSummary::kNoCall,
190                                                            kIntrinsified);
191   locations->SetInAt(0, Location::RequiresRegister());
192   locations->SetOut(Location::SameAsFirstInput());
193 }
194 
GenReverseBytes(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)195 static void GenReverseBytes(LocationSummary* locations,
196                             Primitive::Type size,
197                             X86_64Assembler* assembler) {
198   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
199 
200   switch (size) {
201     case Primitive::kPrimShort:
202       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
203       __ bswapl(out);
204       __ sarl(out, Immediate(16));
205       break;
206     case Primitive::kPrimInt:
207       __ bswapl(out);
208       break;
209     case Primitive::kPrimLong:
210       __ bswapq(out);
211       break;
212     default:
213       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
214       UNREACHABLE();
215   }
216 }
217 
VisitIntegerReverseBytes(HInvoke * invoke)218 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
219   CreateIntToIntLocations(arena_, invoke);
220 }
221 
VisitIntegerReverseBytes(HInvoke * invoke)222 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
223   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
224 }
225 
VisitLongReverseBytes(HInvoke * invoke)226 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
227   CreateIntToIntLocations(arena_, invoke);
228 }
229 
VisitLongReverseBytes(HInvoke * invoke)230 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
231   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
232 }
233 
VisitShortReverseBytes(HInvoke * invoke)234 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
235   CreateIntToIntLocations(arena_, invoke);
236 }
237 
VisitShortReverseBytes(HInvoke * invoke)238 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
239   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
240 }
241 
242 
243 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
244 //       need is 64b.
245 
CreateFloatToFloatPlusTemps(ArenaAllocator * arena,HInvoke * invoke)246 static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
247   // TODO: Enable memory operations when the assembler supports them.
248   LocationSummary* locations = new (arena) LocationSummary(invoke,
249                                                            LocationSummary::kNoCall,
250                                                            kIntrinsified);
251   locations->SetInAt(0, Location::RequiresFpuRegister());
252   locations->SetOut(Location::SameAsFirstInput());
253   locations->AddTemp(Location::RequiresFpuRegister());  // FP reg to hold mask.
254 }
255 
MathAbsFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)256 static void MathAbsFP(LocationSummary* locations,
257                       bool is64bit,
258                       X86_64Assembler* assembler,
259                       CodeGeneratorX86_64* codegen) {
260   Location output = locations->Out();
261 
262   DCHECK(output.IsFpuRegister());
263   XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
264 
265   // TODO: Can mask directly with constant area using pand if we can guarantee
266   // that the literal is aligned on a 16 byte boundary.  This will avoid a
267   // temporary.
268   if (is64bit) {
269     __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
270     __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
271   } else {
272     __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
273     __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
274   }
275 }
276 
VisitMathAbsDouble(HInvoke * invoke)277 void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
278   CreateFloatToFloatPlusTemps(arena_, invoke);
279 }
280 
VisitMathAbsDouble(HInvoke * invoke)281 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
282   MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
283 }
284 
VisitMathAbsFloat(HInvoke * invoke)285 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
286   CreateFloatToFloatPlusTemps(arena_, invoke);
287 }
288 
VisitMathAbsFloat(HInvoke * invoke)289 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
290   MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
291 }
292 
CreateIntToIntPlusTemp(ArenaAllocator * arena,HInvoke * invoke)293 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
294   LocationSummary* locations = new (arena) LocationSummary(invoke,
295                                                            LocationSummary::kNoCall,
296                                                            kIntrinsified);
297   locations->SetInAt(0, Location::RequiresRegister());
298   locations->SetOut(Location::SameAsFirstInput());
299   locations->AddTemp(Location::RequiresRegister());
300 }
301 
GenAbsInteger(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)302 static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
303   Location output = locations->Out();
304   CpuRegister out = output.AsRegister<CpuRegister>();
305   CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
306 
307   if (is64bit) {
308     // Create mask.
309     __ movq(mask, out);
310     __ sarq(mask, Immediate(63));
311     // Add mask.
312     __ addq(out, mask);
313     __ xorq(out, mask);
314   } else {
315     // Create mask.
316     __ movl(mask, out);
317     __ sarl(mask, Immediate(31));
318     // Add mask.
319     __ addl(out, mask);
320     __ xorl(out, mask);
321   }
322 }
323 
VisitMathAbsInt(HInvoke * invoke)324 void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
325   CreateIntToIntPlusTemp(arena_, invoke);
326 }
327 
VisitMathAbsInt(HInvoke * invoke)328 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
329   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
330 }
331 
VisitMathAbsLong(HInvoke * invoke)332 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
333   CreateIntToIntPlusTemp(arena_, invoke);
334 }
335 
VisitMathAbsLong(HInvoke * invoke)336 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
337   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
338 }
339 
GenMinMaxFP(LocationSummary * locations,bool is_min,bool is_double,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)340 static void GenMinMaxFP(LocationSummary* locations,
341                         bool is_min,
342                         bool is_double,
343                         X86_64Assembler* assembler,
344                         CodeGeneratorX86_64* codegen) {
345   Location op1_loc = locations->InAt(0);
346   Location op2_loc = locations->InAt(1);
347   Location out_loc = locations->Out();
348   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
349 
350   // Shortcut for same input locations.
351   if (op1_loc.Equals(op2_loc)) {
352     DCHECK(out_loc.Equals(op1_loc));
353     return;
354   }
355 
356   //  (out := op1)
357   //  out <=? op2
358   //  if Nan jmp Nan_label
359   //  if out is min jmp done
360   //  if op2 is min jmp op2_label
361   //  handle -0/+0
362   //  jmp done
363   // Nan_label:
364   //  out := NaN
365   // op2_label:
366   //  out := op2
367   // done:
368   //
369   // This removes one jmp, but needs to copy one input (op1) to out.
370   //
371   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
372 
373   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
374 
375   NearLabel nan, done, op2_label;
376   if (is_double) {
377     __ ucomisd(out, op2);
378   } else {
379     __ ucomiss(out, op2);
380   }
381 
382   __ j(Condition::kParityEven, &nan);
383 
384   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
385   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
386 
387   // Handle 0.0/-0.0.
388   if (is_min) {
389     if (is_double) {
390       __ orpd(out, op2);
391     } else {
392       __ orps(out, op2);
393     }
394   } else {
395     if (is_double) {
396       __ andpd(out, op2);
397     } else {
398       __ andps(out, op2);
399     }
400   }
401   __ jmp(&done);
402 
403   // NaN handling.
404   __ Bind(&nan);
405   if (is_double) {
406     __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
407   } else {
408     __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
409   }
410   __ jmp(&done);
411 
412   // out := op2;
413   __ Bind(&op2_label);
414   if (is_double) {
415     __ movsd(out, op2);
416   } else {
417     __ movss(out, op2);
418   }
419 
420   // Done.
421   __ Bind(&done);
422 }
423 
CreateFPFPToFP(ArenaAllocator * arena,HInvoke * invoke)424 static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
425   LocationSummary* locations = new (arena) LocationSummary(invoke,
426                                                            LocationSummary::kNoCall,
427                                                            kIntrinsified);
428   locations->SetInAt(0, Location::RequiresFpuRegister());
429   locations->SetInAt(1, Location::RequiresFpuRegister());
430   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
431   // the second input to be the output (we can simply swap inputs).
432   locations->SetOut(Location::SameAsFirstInput());
433 }
434 
VisitMathMinDoubleDouble(HInvoke * invoke)435 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
436   CreateFPFPToFP(arena_, invoke);
437 }
438 
VisitMathMinDoubleDouble(HInvoke * invoke)439 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
440   GenMinMaxFP(
441       invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
442 }
443 
VisitMathMinFloatFloat(HInvoke * invoke)444 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
445   CreateFPFPToFP(arena_, invoke);
446 }
447 
VisitMathMinFloatFloat(HInvoke * invoke)448 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
449   GenMinMaxFP(
450       invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
451 }
452 
VisitMathMaxDoubleDouble(HInvoke * invoke)453 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
454   CreateFPFPToFP(arena_, invoke);
455 }
456 
VisitMathMaxDoubleDouble(HInvoke * invoke)457 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
458   GenMinMaxFP(
459       invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
460 }
461 
VisitMathMaxFloatFloat(HInvoke * invoke)462 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
463   CreateFPFPToFP(arena_, invoke);
464 }
465 
VisitMathMaxFloatFloat(HInvoke * invoke)466 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
467   GenMinMaxFP(
468       invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
469 }
470 
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,X86_64Assembler * assembler)471 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
472                       X86_64Assembler* assembler) {
473   Location op1_loc = locations->InAt(0);
474   Location op2_loc = locations->InAt(1);
475 
476   // Shortcut for same input locations.
477   if (op1_loc.Equals(op2_loc)) {
478     // Can return immediately, as op1_loc == out_loc.
479     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
480     //       a copy here.
481     DCHECK(locations->Out().Equals(op1_loc));
482     return;
483   }
484 
485   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
486   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
487 
488   //  (out := op1)
489   //  out <=? op2
490   //  if out is min jmp done
491   //  out := op2
492   // done:
493 
494   if (is_long) {
495     __ cmpq(out, op2);
496   } else {
497     __ cmpl(out, op2);
498   }
499 
500   __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
501 }
502 
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)503 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
504   LocationSummary* locations = new (arena) LocationSummary(invoke,
505                                                            LocationSummary::kNoCall,
506                                                            kIntrinsified);
507   locations->SetInAt(0, Location::RequiresRegister());
508   locations->SetInAt(1, Location::RequiresRegister());
509   locations->SetOut(Location::SameAsFirstInput());
510 }
511 
VisitMathMinIntInt(HInvoke * invoke)512 void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
513   CreateIntIntToIntLocations(arena_, invoke);
514 }
515 
VisitMathMinIntInt(HInvoke * invoke)516 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
517   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
518 }
519 
VisitMathMinLongLong(HInvoke * invoke)520 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
521   CreateIntIntToIntLocations(arena_, invoke);
522 }
523 
VisitMathMinLongLong(HInvoke * invoke)524 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
525   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
526 }
527 
VisitMathMaxIntInt(HInvoke * invoke)528 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
529   CreateIntIntToIntLocations(arena_, invoke);
530 }
531 
VisitMathMaxIntInt(HInvoke * invoke)532 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
533   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
534 }
535 
VisitMathMaxLongLong(HInvoke * invoke)536 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
537   CreateIntIntToIntLocations(arena_, invoke);
538 }
539 
VisitMathMaxLongLong(HInvoke * invoke)540 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
541   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
542 }
543 
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)544 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
545   LocationSummary* locations = new (arena) LocationSummary(invoke,
546                                                            LocationSummary::kNoCall,
547                                                            kIntrinsified);
548   locations->SetInAt(0, Location::RequiresFpuRegister());
549   locations->SetOut(Location::RequiresFpuRegister());
550 }
551 
VisitMathSqrt(HInvoke * invoke)552 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
553   CreateFPToFPLocations(arena_, invoke);
554 }
555 
VisitMathSqrt(HInvoke * invoke)556 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
557   LocationSummary* locations = invoke->GetLocations();
558   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
559   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
560 
561   GetAssembler()->sqrtsd(out, in);
562 }
563 
InvokeOutOfLineIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke)564 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
565   MoveArguments(invoke, codegen);
566 
567   DCHECK(invoke->IsInvokeStaticOrDirect());
568   codegen->GenerateStaticOrDirectCall(
569       invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
570   codegen->RecordPcInfo(invoke, invoke->GetDexPc());
571 
572   // Copy the result back to the expected output.
573   Location out = invoke->GetLocations()->Out();
574   if (out.IsValid()) {
575     DCHECK(out.IsRegister());
576     codegen->MoveFromReturnRegister(out, invoke->GetType());
577   }
578 }
579 
CreateSSE41FPToFPLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)580 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
581                                       HInvoke* invoke,
582                                       CodeGeneratorX86_64* codegen) {
583   // Do we have instruction support?
584   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
585     CreateFPToFPLocations(arena, invoke);
586     return;
587   }
588 
589   // We have to fall back to a call to the intrinsic.
590   LocationSummary* locations = new (arena) LocationSummary(invoke,
591                                                            LocationSummary::kCallOnMainOnly);
592   InvokeRuntimeCallingConvention calling_convention;
593   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
594   locations->SetOut(Location::FpuRegisterLocation(XMM0));
595   // Needs to be RDI for the invoke.
596   locations->AddTemp(Location::RegisterLocation(RDI));
597 }
598 
GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke,X86_64Assembler * assembler,int round_mode)599 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
600                                    HInvoke* invoke,
601                                    X86_64Assembler* assembler,
602                                    int round_mode) {
603   LocationSummary* locations = invoke->GetLocations();
604   if (locations->WillCall()) {
605     InvokeOutOfLineIntrinsic(codegen, invoke);
606   } else {
607     XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
608     XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
609     __ roundsd(out, in, Immediate(round_mode));
610   }
611 }
612 
VisitMathCeil(HInvoke * invoke)613 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
614   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
615 }
616 
VisitMathCeil(HInvoke * invoke)617 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
618   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
619 }
620 
VisitMathFloor(HInvoke * invoke)621 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
622   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
623 }
624 
VisitMathFloor(HInvoke * invoke)625 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
626   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
627 }
628 
VisitMathRint(HInvoke * invoke)629 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
630   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
631 }
632 
VisitMathRint(HInvoke * invoke)633 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
634   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
635 }
636 
CreateSSE41FPToIntLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)637 static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
638                                        HInvoke* invoke,
639                                        CodeGeneratorX86_64* codegen) {
640   // Do we have instruction support?
641   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
642     LocationSummary* locations = new (arena) LocationSummary(invoke,
643                                                               LocationSummary::kNoCall,
644                                                               kIntrinsified);
645     locations->SetInAt(0, Location::RequiresFpuRegister());
646     locations->SetOut(Location::RequiresRegister());
647     locations->AddTemp(Location::RequiresFpuRegister());
648     locations->AddTemp(Location::RequiresFpuRegister());
649     return;
650   }
651 
652   // We have to fall back to a call to the intrinsic.
653   LocationSummary* locations = new (arena) LocationSummary(invoke,
654                                                            LocationSummary::kCallOnMainOnly);
655   InvokeRuntimeCallingConvention calling_convention;
656   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
657   locations->SetOut(Location::RegisterLocation(RAX));
658   // Needs to be RDI for the invoke.
659   locations->AddTemp(Location::RegisterLocation(RDI));
660 }
661 
VisitMathRoundFloat(HInvoke * invoke)662 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
663   CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
664 }
665 
VisitMathRoundFloat(HInvoke * invoke)666 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
667   LocationSummary* locations = invoke->GetLocations();
668   if (locations->WillCall()) {
669     InvokeOutOfLineIntrinsic(codegen_, invoke);
670     return;
671   }
672 
673   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
674   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
675   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
676   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
677   NearLabel skip_incr, done;
678   X86_64Assembler* assembler = GetAssembler();
679 
680   // Since no direct x86 rounding instruction matches the required semantics,
681   // this intrinsic is implemented as follows:
682   //  result = floor(in);
683   //  if (in - result >= 0.5f)
684   //    result = result + 1.0f;
685   __ movss(t2, in);
686   __ roundss(t1, in, Immediate(1));
687   __ subss(t2, t1);
688   __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
689   __ j(kBelow, &skip_incr);
690   __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
691   __ Bind(&skip_incr);
692 
693   // Final conversion to an integer. Unfortunately this also does not have a
694   // direct x86 instruction, since NaN should map to 0 and large positive
695   // values need to be clipped to the extreme value.
696   codegen_->Load32BitValue(out, kPrimIntMax);
697   __ cvtsi2ss(t2, out);
698   __ comiss(t1, t2);
699   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
700   __ movl(out, Immediate(0));  // does not change flags
701   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
702   __ cvttss2si(out, t1);
703   __ Bind(&done);
704 }
705 
VisitMathRoundDouble(HInvoke * invoke)706 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
707   CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
708 }
709 
VisitMathRoundDouble(HInvoke * invoke)710 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
711   LocationSummary* locations = invoke->GetLocations();
712   if (locations->WillCall()) {
713     InvokeOutOfLineIntrinsic(codegen_, invoke);
714     return;
715   }
716 
717   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
718   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
719   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
720   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
721   NearLabel skip_incr, done;
722   X86_64Assembler* assembler = GetAssembler();
723 
724   // Since no direct x86 rounding instruction matches the required semantics,
725   // this intrinsic is implemented as follows:
726   //  result = floor(in);
727   //  if (in - result >= 0.5)
728   //    result = result + 1.0f;
729   __ movsd(t2, in);
730   __ roundsd(t1, in, Immediate(1));
731   __ subsd(t2, t1);
732   __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
733   __ j(kBelow, &skip_incr);
734   __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
735   __ Bind(&skip_incr);
736 
737   // Final conversion to an integer. Unfortunately this also does not have a
738   // direct x86 instruction, since NaN should map to 0 and large positive
739   // values need to be clipped to the extreme value.
740   codegen_->Load64BitValue(out, kPrimLongMax);
741   __ cvtsi2sd(t2, out, /* is64bit */ true);
742   __ comisd(t1, t2);
743   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
744   __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
745   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
746   __ cvttsd2si(out, t1, /* is64bit */ true);
747   __ Bind(&done);
748 }
749 
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)750 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
751                                       HInvoke* invoke) {
752   LocationSummary* locations = new (arena) LocationSummary(invoke,
753                                                            LocationSummary::kCallOnMainOnly,
754                                                            kIntrinsified);
755   InvokeRuntimeCallingConvention calling_convention;
756   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
757   locations->SetOut(Location::FpuRegisterLocation(XMM0));
758 
759   // We have to ensure that the native code doesn't clobber the XMM registers which are
760   // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
761   // saved in the prologue and properly restored.
762   for (auto fp_reg : non_volatile_xmm_regs) {
763     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
764   }
765 }
766 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)767 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
768                           QuickEntrypointEnum entry) {
769   LocationSummary* locations = invoke->GetLocations();
770   DCHECK(locations->WillCall());
771   DCHECK(invoke->IsInvokeStaticOrDirect());
772 
773   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
774 }
775 
VisitMathCos(HInvoke * invoke)776 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
777   CreateFPToFPCallLocations(arena_, invoke);
778 }
779 
VisitMathCos(HInvoke * invoke)780 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
781   GenFPToFPCall(invoke, codegen_, kQuickCos);
782 }
783 
VisitMathSin(HInvoke * invoke)784 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
785   CreateFPToFPCallLocations(arena_, invoke);
786 }
787 
VisitMathSin(HInvoke * invoke)788 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
789   GenFPToFPCall(invoke, codegen_, kQuickSin);
790 }
791 
VisitMathAcos(HInvoke * invoke)792 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
793   CreateFPToFPCallLocations(arena_, invoke);
794 }
795 
VisitMathAcos(HInvoke * invoke)796 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
797   GenFPToFPCall(invoke, codegen_, kQuickAcos);
798 }
799 
VisitMathAsin(HInvoke * invoke)800 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
801   CreateFPToFPCallLocations(arena_, invoke);
802 }
803 
VisitMathAsin(HInvoke * invoke)804 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
805   GenFPToFPCall(invoke, codegen_, kQuickAsin);
806 }
807 
VisitMathAtan(HInvoke * invoke)808 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
809   CreateFPToFPCallLocations(arena_, invoke);
810 }
811 
VisitMathAtan(HInvoke * invoke)812 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
813   GenFPToFPCall(invoke, codegen_, kQuickAtan);
814 }
815 
VisitMathCbrt(HInvoke * invoke)816 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
817   CreateFPToFPCallLocations(arena_, invoke);
818 }
819 
VisitMathCbrt(HInvoke * invoke)820 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
821   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
822 }
823 
VisitMathCosh(HInvoke * invoke)824 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
825   CreateFPToFPCallLocations(arena_, invoke);
826 }
827 
VisitMathCosh(HInvoke * invoke)828 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
829   GenFPToFPCall(invoke, codegen_, kQuickCosh);
830 }
831 
VisitMathExp(HInvoke * invoke)832 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
833   CreateFPToFPCallLocations(arena_, invoke);
834 }
835 
VisitMathExp(HInvoke * invoke)836 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
837   GenFPToFPCall(invoke, codegen_, kQuickExp);
838 }
839 
VisitMathExpm1(HInvoke * invoke)840 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
841   CreateFPToFPCallLocations(arena_, invoke);
842 }
843 
VisitMathExpm1(HInvoke * invoke)844 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
845   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
846 }
847 
VisitMathLog(HInvoke * invoke)848 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
849   CreateFPToFPCallLocations(arena_, invoke);
850 }
851 
VisitMathLog(HInvoke * invoke)852 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
853   GenFPToFPCall(invoke, codegen_, kQuickLog);
854 }
855 
VisitMathLog10(HInvoke * invoke)856 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
857   CreateFPToFPCallLocations(arena_, invoke);
858 }
859 
VisitMathLog10(HInvoke * invoke)860 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
861   GenFPToFPCall(invoke, codegen_, kQuickLog10);
862 }
863 
VisitMathSinh(HInvoke * invoke)864 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
865   CreateFPToFPCallLocations(arena_, invoke);
866 }
867 
VisitMathSinh(HInvoke * invoke)868 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
869   GenFPToFPCall(invoke, codegen_, kQuickSinh);
870 }
871 
VisitMathTan(HInvoke * invoke)872 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
873   CreateFPToFPCallLocations(arena_, invoke);
874 }
875 
VisitMathTan(HInvoke * invoke)876 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
877   GenFPToFPCall(invoke, codegen_, kQuickTan);
878 }
879 
VisitMathTanh(HInvoke * invoke)880 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
881   CreateFPToFPCallLocations(arena_, invoke);
882 }
883 
VisitMathTanh(HInvoke * invoke)884 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
885   GenFPToFPCall(invoke, codegen_, kQuickTanh);
886 }
887 
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)888 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
889                                         HInvoke* invoke) {
890   LocationSummary* locations = new (arena) LocationSummary(invoke,
891                                                            LocationSummary::kCallOnMainOnly,
892                                                            kIntrinsified);
893   InvokeRuntimeCallingConvention calling_convention;
894   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
895   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
896   locations->SetOut(Location::FpuRegisterLocation(XMM0));
897 
898   // We have to ensure that the native code doesn't clobber the XMM registers which are
899   // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
900   // saved in the prologue and properly restored.
901   for (auto fp_reg : non_volatile_xmm_regs) {
902     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
903   }
904 }
905 
VisitMathAtan2(HInvoke * invoke)906 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
907   CreateFPFPToFPCallLocations(arena_, invoke);
908 }
909 
VisitMathAtan2(HInvoke * invoke)910 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
911   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
912 }
913 
VisitMathHypot(HInvoke * invoke)914 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
915   CreateFPFPToFPCallLocations(arena_, invoke);
916 }
917 
VisitMathHypot(HInvoke * invoke)918 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
919   GenFPToFPCall(invoke, codegen_, kQuickHypot);
920 }
921 
VisitMathNextAfter(HInvoke * invoke)922 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
923   CreateFPFPToFPCallLocations(arena_, invoke);
924 }
925 
VisitMathNextAfter(HInvoke * invoke)926 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
927   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
928 }
929 
VisitSystemArrayCopyChar(HInvoke * invoke)930 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
931   // Check to see if we have known failures that will cause us to have to bail out
932   // to the runtime, and just generate the runtime call directly.
933   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
934   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
935 
936   // The positions must be non-negative.
937   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
938       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
939     // We will have to fail anyways.
940     return;
941   }
942 
943   // The length must be > 0.
944   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
945   if (length != nullptr) {
946     int32_t len = length->GetValue();
947     if (len < 0) {
948       // Just call as normal.
949       return;
950     }
951   }
952 
953   LocationSummary* locations = new (arena_) LocationSummary(invoke,
954                                                             LocationSummary::kCallOnSlowPath,
955                                                             kIntrinsified);
956   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
957   locations->SetInAt(0, Location::RequiresRegister());
958   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
959   locations->SetInAt(2, Location::RequiresRegister());
960   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
961   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
962 
963   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
964   locations->AddTemp(Location::RegisterLocation(RSI));
965   locations->AddTemp(Location::RegisterLocation(RDI));
966   locations->AddTemp(Location::RegisterLocation(RCX));
967 }
968 
CheckPosition(X86_64Assembler * assembler,Location pos,CpuRegister input,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_input_length=false)969 static void CheckPosition(X86_64Assembler* assembler,
970                           Location pos,
971                           CpuRegister input,
972                           Location length,
973                           SlowPathCode* slow_path,
974                           CpuRegister temp,
975                           bool length_is_input_length = false) {
976   // Where is the length in the Array?
977   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
978 
979   if (pos.IsConstant()) {
980     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
981     if (pos_const == 0) {
982       if (!length_is_input_length) {
983         // Check that length(input) >= length.
984         if (length.IsConstant()) {
985           __ cmpl(Address(input, length_offset),
986                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
987         } else {
988           __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
989         }
990         __ j(kLess, slow_path->GetEntryLabel());
991       }
992     } else {
993       // Check that length(input) >= pos.
994       __ movl(temp, Address(input, length_offset));
995       __ subl(temp, Immediate(pos_const));
996       __ j(kLess, slow_path->GetEntryLabel());
997 
998       // Check that (length(input) - pos) >= length.
999       if (length.IsConstant()) {
1000         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1001       } else {
1002         __ cmpl(temp, length.AsRegister<CpuRegister>());
1003       }
1004       __ j(kLess, slow_path->GetEntryLabel());
1005     }
1006   } else if (length_is_input_length) {
1007     // The only way the copy can succeed is if pos is zero.
1008     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
1009     __ testl(pos_reg, pos_reg);
1010     __ j(kNotEqual, slow_path->GetEntryLabel());
1011   } else {
1012     // Check that pos >= 0.
1013     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
1014     __ testl(pos_reg, pos_reg);
1015     __ j(kLess, slow_path->GetEntryLabel());
1016 
1017     // Check that pos <= length(input).
1018     __ cmpl(Address(input, length_offset), pos_reg);
1019     __ j(kLess, slow_path->GetEntryLabel());
1020 
1021     // Check that (length(input) - pos) >= length.
1022     __ movl(temp, Address(input, length_offset));
1023     __ subl(temp, pos_reg);
1024     if (length.IsConstant()) {
1025       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1026     } else {
1027       __ cmpl(temp, length.AsRegister<CpuRegister>());
1028     }
1029     __ j(kLess, slow_path->GetEntryLabel());
1030   }
1031 }
1032 
VisitSystemArrayCopyChar(HInvoke * invoke)1033 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
1034   X86_64Assembler* assembler = GetAssembler();
1035   LocationSummary* locations = invoke->GetLocations();
1036 
1037   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
1038   Location src_pos = locations->InAt(1);
1039   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
1040   Location dest_pos = locations->InAt(3);
1041   Location length = locations->InAt(4);
1042 
1043   // Temporaries that we need for MOVSW.
1044   CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
1045   DCHECK_EQ(src_base.AsRegister(), RSI);
1046   CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
1047   DCHECK_EQ(dest_base.AsRegister(), RDI);
1048   CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
1049   DCHECK_EQ(count.AsRegister(), RCX);
1050 
1051   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1052   codegen_->AddSlowPath(slow_path);
1053 
1054   // Bail out if the source and destination are the same.
1055   __ cmpl(src, dest);
1056   __ j(kEqual, slow_path->GetEntryLabel());
1057 
1058   // Bail out if the source is null.
1059   __ testl(src, src);
1060   __ j(kEqual, slow_path->GetEntryLabel());
1061 
1062   // Bail out if the destination is null.
1063   __ testl(dest, dest);
1064   __ j(kEqual, slow_path->GetEntryLabel());
1065 
1066   // If the length is negative, bail out.
1067   // We have already checked in the LocationsBuilder for the constant case.
1068   if (!length.IsConstant()) {
1069     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1070     __ j(kLess, slow_path->GetEntryLabel());
1071   }
1072 
1073   // Validity checks: source. Use src_base as a temporary register.
1074   CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
1075 
1076   // Validity checks: dest. Use src_base as a temporary register.
1077   CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
1078 
1079   // We need the count in RCX.
1080   if (length.IsConstant()) {
1081     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1082   } else {
1083     __ movl(count, length.AsRegister<CpuRegister>());
1084   }
1085 
1086   // Okay, everything checks out.  Finally time to do the copy.
1087   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1088   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1089   DCHECK_EQ(char_size, 2u);
1090 
1091   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1092 
1093   if (src_pos.IsConstant()) {
1094     int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
1095     __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
1096   } else {
1097     __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
1098                               ScaleFactor::TIMES_2, data_offset));
1099   }
1100   if (dest_pos.IsConstant()) {
1101     int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1102     __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
1103   } else {
1104     __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
1105                                ScaleFactor::TIMES_2, data_offset));
1106   }
1107 
1108   // Do the move.
1109   __ rep_movsw();
1110 
1111   __ Bind(slow_path->GetExitLabel());
1112 }
1113 
1114 
VisitSystemArrayCopy(HInvoke * invoke)1115 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
1116   // The only read barrier implementation supporting the
1117   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1118   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1119     return;
1120   }
1121 
1122   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1123 }
1124 
1125 // Compute base source address, base destination address, and end
1126 // source address for the System.arraycopy intrinsic in `src_base`,
1127 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(X86_64Assembler * assembler,Primitive::Type type,const CpuRegister & src,const Location & src_pos,const CpuRegister & dst,const Location & dst_pos,const Location & copy_length,const CpuRegister & src_base,const CpuRegister & dst_base,const CpuRegister & src_end)1128 static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
1129                                         Primitive::Type type,
1130                                         const CpuRegister& src,
1131                                         const Location& src_pos,
1132                                         const CpuRegister& dst,
1133                                         const Location& dst_pos,
1134                                         const Location& copy_length,
1135                                         const CpuRegister& src_base,
1136                                         const CpuRegister& dst_base,
1137                                         const CpuRegister& src_end) {
1138   // This routine is only used by the SystemArrayCopy intrinsic.
1139   DCHECK_EQ(type, Primitive::kPrimNot);
1140   const int32_t element_size = Primitive::ComponentSize(type);
1141   const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
1142   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1143 
1144   if (src_pos.IsConstant()) {
1145     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1146     __ leal(src_base, Address(src, element_size * constant + data_offset));
1147   } else {
1148     __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
1149   }
1150 
1151   if (dst_pos.IsConstant()) {
1152     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
1153     __ leal(dst_base, Address(dst, element_size * constant + data_offset));
1154   } else {
1155     __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
1156   }
1157 
1158   if (copy_length.IsConstant()) {
1159     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
1160     __ leal(src_end, Address(src_base, element_size * constant));
1161   } else {
1162     __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0));
1163   }
1164 }
1165 
VisitSystemArrayCopy(HInvoke * invoke)1166 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
1167   // The only read barrier implementation supporting the
1168   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1169   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1170 
1171   X86_64Assembler* assembler = GetAssembler();
1172   LocationSummary* locations = invoke->GetLocations();
1173 
1174   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1175   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1176   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1177   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1178   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1179 
1180   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
1181   Location src_pos = locations->InAt(1);
1182   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
1183   Location dest_pos = locations->InAt(3);
1184   Location length = locations->InAt(4);
1185   Location temp1_loc = locations->GetTemp(0);
1186   CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
1187   Location temp2_loc = locations->GetTemp(1);
1188   CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
1189   Location temp3_loc = locations->GetTemp(2);
1190   CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
1191   Location TMP_loc = Location::RegisterLocation(TMP);
1192 
1193   SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1194   codegen_->AddSlowPath(intrinsic_slow_path);
1195 
1196   NearLabel conditions_on_positions_validated;
1197   SystemArrayCopyOptimizations optimizations(invoke);
1198 
1199   // If source and destination are the same, we go to slow path if we need to do
1200   // forward copying.
1201   if (src_pos.IsConstant()) {
1202     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1203     if (dest_pos.IsConstant()) {
1204       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1205       if (optimizations.GetDestinationIsSource()) {
1206         // Checked when building locations.
1207         DCHECK_GE(src_pos_constant, dest_pos_constant);
1208       } else if (src_pos_constant < dest_pos_constant) {
1209         __ cmpl(src, dest);
1210         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1211       }
1212     } else {
1213       if (!optimizations.GetDestinationIsSource()) {
1214         __ cmpl(src, dest);
1215         __ j(kNotEqual, &conditions_on_positions_validated);
1216       }
1217       __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
1218       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
1219     }
1220   } else {
1221     if (!optimizations.GetDestinationIsSource()) {
1222       __ cmpl(src, dest);
1223       __ j(kNotEqual, &conditions_on_positions_validated);
1224     }
1225     if (dest_pos.IsConstant()) {
1226       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1227       __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
1228       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1229     } else {
1230       __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
1231       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1232     }
1233   }
1234 
1235   __ Bind(&conditions_on_positions_validated);
1236 
1237   if (!optimizations.GetSourceIsNotNull()) {
1238     // Bail out if the source is null.
1239     __ testl(src, src);
1240     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1241   }
1242 
1243   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1244     // Bail out if the destination is null.
1245     __ testl(dest, dest);
1246     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1247   }
1248 
1249   // If the length is negative, bail out.
1250   // We have already checked in the LocationsBuilder for the constant case.
1251   if (!length.IsConstant() &&
1252       !optimizations.GetCountIsSourceLength() &&
1253       !optimizations.GetCountIsDestinationLength()) {
1254     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1255     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1256   }
1257 
1258   // Validity checks: source.
1259   CheckPosition(assembler,
1260                 src_pos,
1261                 src,
1262                 length,
1263                 intrinsic_slow_path,
1264                 temp1,
1265                 optimizations.GetCountIsSourceLength());
1266 
1267   // Validity checks: dest.
1268   CheckPosition(assembler,
1269                 dest_pos,
1270                 dest,
1271                 length,
1272                 intrinsic_slow_path,
1273                 temp1,
1274                 optimizations.GetCountIsDestinationLength());
1275 
1276   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1277     // Check whether all elements of the source array are assignable to the component
1278     // type of the destination array. We do two checks: the classes are the same,
1279     // or the destination is Object[]. If none of these checks succeed, we go to the
1280     // slow path.
1281 
1282     bool did_unpoison = false;
1283     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1284       // /* HeapReference<Class> */ temp1 = dest->klass_
1285       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1286           invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
1287       // Register `temp1` is not trashed by the read barrier emitted
1288       // by GenerateFieldLoadWithBakerReadBarrier below, as that
1289       // method produces a call to a ReadBarrierMarkRegX entry point,
1290       // which saves all potentially live registers, including
1291       // temporaries such a `temp1`.
1292       // /* HeapReference<Class> */ temp2 = src->klass_
1293       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1294           invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
1295       // If heap poisoning is enabled, `temp1` and `temp2` have been
1296       // unpoisoned by the the previous calls to
1297       // GenerateFieldLoadWithBakerReadBarrier.
1298     } else {
1299       // /* HeapReference<Class> */ temp1 = dest->klass_
1300       __ movl(temp1, Address(dest, class_offset));
1301       // /* HeapReference<Class> */ temp2 = src->klass_
1302       __ movl(temp2, Address(src, class_offset));
1303       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1304           !optimizations.GetSourceIsNonPrimitiveArray()) {
1305         // One or two of the references need to be unpoisoned. Unpoison them
1306         // both to make the identity check valid.
1307         __ MaybeUnpoisonHeapReference(temp1);
1308         __ MaybeUnpoisonHeapReference(temp2);
1309         did_unpoison = true;
1310       }
1311     }
1312 
1313     if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1314       // Bail out if the destination is not a non primitive array.
1315       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1316         // /* HeapReference<Class> */ TMP = temp1->component_type_
1317         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1318             invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
1319         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1320         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1321         // If heap poisoning is enabled, `TMP` has been unpoisoned by
1322         // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1323       } else {
1324         // /* HeapReference<Class> */ TMP = temp1->component_type_
1325         __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1326         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1327         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1328         __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1329       }
1330       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1331       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1332     }
1333 
1334     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1335       // Bail out if the source is not a non primitive array.
1336       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1337         // For the same reason given earlier, `temp1` is not trashed by the
1338         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1339         // /* HeapReference<Class> */ TMP = temp2->component_type_
1340         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1341             invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false);
1342         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1343         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1344         // If heap poisoning is enabled, `TMP` has been unpoisoned by
1345         // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1346       } else {
1347         // /* HeapReference<Class> */ TMP = temp2->component_type_
1348         __ movl(CpuRegister(TMP), Address(temp2, component_offset));
1349         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1350         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1351         __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1352       }
1353       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1354       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1355     }
1356 
1357     __ cmpl(temp1, temp2);
1358 
1359     if (optimizations.GetDestinationIsTypedObjectArray()) {
1360       NearLabel do_copy;
1361       __ j(kEqual, &do_copy);
1362       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1363         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1364         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1365             invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
1366         // We do not need to emit a read barrier for the following
1367         // heap reference load, as `temp1` is only used in a
1368         // comparison with null below, and this reference is not
1369         // kept afterwards.
1370         __ cmpl(Address(temp1, super_offset), Immediate(0));
1371       } else {
1372         if (!did_unpoison) {
1373           __ MaybeUnpoisonHeapReference(temp1);
1374         }
1375         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1376         __ movl(temp1, Address(temp1, component_offset));
1377         __ MaybeUnpoisonHeapReference(temp1);
1378         // No need to unpoison the following heap reference load, as
1379         // we're comparing against null.
1380         __ cmpl(Address(temp1, super_offset), Immediate(0));
1381       }
1382       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1383       __ Bind(&do_copy);
1384     } else {
1385       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1386     }
1387   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1388     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1389     // Bail out if the source is not a non primitive array.
1390     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1391       // /* HeapReference<Class> */ temp1 = src->klass_
1392       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1393           invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
1394       // /* HeapReference<Class> */ TMP = temp1->component_type_
1395       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1396           invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
1397       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1398       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1399     } else {
1400       // /* HeapReference<Class> */ temp1 = src->klass_
1401       __ movl(temp1, Address(src, class_offset));
1402       __ MaybeUnpoisonHeapReference(temp1);
1403       // /* HeapReference<Class> */ TMP = temp1->component_type_
1404       __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1405       // No need to unpoison `TMP` now, as we're comparing against null.
1406       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1407       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1408       __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1409     }
1410     __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1411     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1412   }
1413 
1414   const Primitive::Type type = Primitive::kPrimNot;
1415   const int32_t element_size = Primitive::ComponentSize(type);
1416 
1417   // Compute base source address, base destination address, and end
1418   // source address in `temp1`, `temp2` and `temp3` respectively.
1419   GenSystemArrayCopyAddresses(
1420       GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
1421 
1422   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1423     // SystemArrayCopy implementation for Baker read barriers (see
1424     // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1425     //
1426     //   if (src_ptr != end_ptr) {
1427     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1428     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1429     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
1430     //     if (is_gray) {
1431     //       // Slow-path copy.
1432     //       do {
1433     //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1434     //       } while (src_ptr != end_ptr)
1435     //     } else {
1436     //       // Fast-path copy.
1437     //       do {
1438     //         *dest_ptr++ = *src_ptr++;
1439     //       } while (src_ptr != end_ptr)
1440     //     }
1441     //   }
1442 
1443     NearLabel loop, done;
1444 
1445     // Don't enter copy loop if `length == 0`.
1446     __ cmpl(temp1, temp3);
1447     __ j(kEqual, &done);
1448 
1449     // Given the numeric representation, it's enough to check the low bit of the rb_state.
1450     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
1451     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1452     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1453     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1454     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1455 
1456     // if (rb_state == ReadBarrier::GrayState())
1457     //   goto slow_path;
1458     // At this point, just do the "if" and make sure that flags are preserved until the branch.
1459     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1460 
1461     // Load fence to prevent load-load reordering.
1462     // Note that this is a no-op, thanks to the x86-64 memory model.
1463     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1464 
1465     // Slow path used to copy array when `src` is gray.
1466     SlowPathCode* read_barrier_slow_path =
1467         new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1468     codegen_->AddSlowPath(read_barrier_slow_path);
1469 
1470     // We have done the "if" of the gray bit check above, now branch based on the flags.
1471     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1472 
1473     // Fast-path copy.
1474     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1475     // poison/unpoison.
1476     __ Bind(&loop);
1477     __ movl(CpuRegister(TMP), Address(temp1, 0));
1478     __ movl(Address(temp2, 0), CpuRegister(TMP));
1479     __ addl(temp1, Immediate(element_size));
1480     __ addl(temp2, Immediate(element_size));
1481     __ cmpl(temp1, temp3);
1482     __ j(kNotEqual, &loop);
1483 
1484     __ Bind(read_barrier_slow_path->GetExitLabel());
1485     __ Bind(&done);
1486   } else {
1487     // Non read barrier code.
1488 
1489     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1490     // poison/unpoison.
1491     NearLabel loop, done;
1492     __ cmpl(temp1, temp3);
1493     __ j(kEqual, &done);
1494     __ Bind(&loop);
1495     __ movl(CpuRegister(TMP), Address(temp1, 0));
1496     __ movl(Address(temp2, 0), CpuRegister(TMP));
1497     __ addl(temp1, Immediate(element_size));
1498     __ addl(temp2, Immediate(element_size));
1499     __ cmpl(temp1, temp3);
1500     __ j(kNotEqual, &loop);
1501     __ Bind(&done);
1502   }
1503 
1504   // We only need one card marking on the destination array.
1505   codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null */ false);
1506 
1507   __ Bind(intrinsic_slow_path->GetExitLabel());
1508 }
1509 
VisitStringCompareTo(HInvoke * invoke)1510 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1511   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1512                                                             LocationSummary::kCallOnMainAndSlowPath,
1513                                                             kIntrinsified);
1514   InvokeRuntimeCallingConvention calling_convention;
1515   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1516   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1517   locations->SetOut(Location::RegisterLocation(RAX));
1518 }
1519 
VisitStringCompareTo(HInvoke * invoke)1520 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1521   X86_64Assembler* assembler = GetAssembler();
1522   LocationSummary* locations = invoke->GetLocations();
1523 
1524   // Note that the null check must have been done earlier.
1525   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1526 
1527   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1528   __ testl(argument, argument);
1529   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1530   codegen_->AddSlowPath(slow_path);
1531   __ j(kEqual, slow_path->GetEntryLabel());
1532 
1533   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1534   __ Bind(slow_path->GetExitLabel());
1535 }
1536 
VisitStringEquals(HInvoke * invoke)1537 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1538   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1539                                                             LocationSummary::kNoCall,
1540                                                             kIntrinsified);
1541   locations->SetInAt(0, Location::RequiresRegister());
1542   locations->SetInAt(1, Location::RequiresRegister());
1543 
1544   // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1545   locations->AddTemp(Location::RegisterLocation(RCX));
1546   locations->AddTemp(Location::RegisterLocation(RDI));
1547 
1548   // Set output, RSI needed for repe_cmpsq instruction anyways.
1549   locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1550 }
1551 
VisitStringEquals(HInvoke * invoke)1552 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1553   X86_64Assembler* assembler = GetAssembler();
1554   LocationSummary* locations = invoke->GetLocations();
1555 
1556   CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1557   CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1558   CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1559   CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1560   CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1561 
1562   NearLabel end, return_true, return_false;
1563 
1564   // Get offsets of count, value, and class fields within a string object.
1565   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1566   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1567   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1568 
1569   // Note that the null check must have been done earlier.
1570   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1571 
1572   StringEqualsOptimizations optimizations(invoke);
1573   if (!optimizations.GetArgumentNotNull()) {
1574     // Check if input is null, return false if it is.
1575     __ testl(arg, arg);
1576     __ j(kEqual, &return_false);
1577   }
1578 
1579   if (!optimizations.GetArgumentIsString()) {
1580     // Instanceof check for the argument by comparing class fields.
1581     // All string objects must have the same type since String cannot be subclassed.
1582     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1583     // If the argument is a string object, its class field must be equal to receiver's class field.
1584     __ movl(rcx, Address(str, class_offset));
1585     __ cmpl(rcx, Address(arg, class_offset));
1586     __ j(kNotEqual, &return_false);
1587   }
1588 
1589   // Reference equality check, return true if same reference.
1590   __ cmpl(str, arg);
1591   __ j(kEqual, &return_true);
1592 
1593   // Load length and compression flag of receiver string.
1594   __ movl(rcx, Address(str, count_offset));
1595   // Check if lengths and compressiond flags are equal, return false if they're not.
1596   // Two identical strings will always have same compression style since
1597   // compression style is decided on alloc.
1598   __ cmpl(rcx, Address(arg, count_offset));
1599   __ j(kNotEqual, &return_false);
1600   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1601   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1602                 "Expecting 0=compressed, 1=uncompressed");
1603   __ jrcxz(&return_true);
1604 
1605   if (mirror::kUseStringCompression) {
1606     NearLabel string_uncompressed;
1607     // Extract length and differentiate between both compressed or both uncompressed.
1608     // Different compression style is cut above.
1609     __ shrl(rcx, Immediate(1));
1610     __ j(kCarrySet, &string_uncompressed);
1611     // Divide string length by 2, rounding up, and continue as if uncompressed.
1612     // Merge clearing the compression flag with +1 for rounding.
1613     __ addl(rcx, Immediate(1));
1614     __ shrl(rcx, Immediate(1));
1615     __ Bind(&string_uncompressed);
1616   }
1617   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1618   __ leal(rsi, Address(str, value_offset));
1619   __ leal(rdi, Address(arg, value_offset));
1620 
1621   // Divide string length by 4 and adjust for lengths not divisible by 4.
1622   __ addl(rcx, Immediate(3));
1623   __ shrl(rcx, Immediate(2));
1624 
1625   // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1626   // or 8 characters (compressed) at a time.
1627   DCHECK_ALIGNED(value_offset, 8);
1628   static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1629 
1630   // Loop to compare strings four characters at a time starting at the beginning of the string.
1631   __ repe_cmpsq();
1632   // If strings are not equal, zero flag will be cleared.
1633   __ j(kNotEqual, &return_false);
1634 
1635   // Return true and exit the function.
1636   // If loop does not result in returning false, we return true.
1637   __ Bind(&return_true);
1638   __ movl(rsi, Immediate(1));
1639   __ jmp(&end);
1640 
1641   // Return false and exit the function.
1642   __ Bind(&return_false);
1643   __ xorl(rsi, rsi);
1644   __ Bind(&end);
1645 }
1646 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1647 static void CreateStringIndexOfLocations(HInvoke* invoke,
1648                                          ArenaAllocator* allocator,
1649                                          bool start_at_zero) {
1650   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1651                                                                LocationSummary::kCallOnSlowPath,
1652                                                                kIntrinsified);
1653   // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1654   locations->SetInAt(0, Location::RegisterLocation(RDI));
1655   // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1656   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1657   // of the instruction explicitly.
1658   // Note: This works as we don't clobber RAX anywhere.
1659   locations->SetInAt(1, Location::RegisterLocation(RAX));
1660   if (!start_at_zero) {
1661     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1662   }
1663   // As we clobber RDI during execution anyways, also use it as the output.
1664   locations->SetOut(Location::SameAsFirstInput());
1665 
1666   // repne scasw uses RCX as the counter.
1667   locations->AddTemp(Location::RegisterLocation(RCX));
1668   // Need another temporary to be able to compute the result.
1669   locations->AddTemp(Location::RequiresRegister());
1670 }
1671 
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,ArenaAllocator * allocator,bool start_at_zero)1672 static void GenerateStringIndexOf(HInvoke* invoke,
1673                                   X86_64Assembler* assembler,
1674                                   CodeGeneratorX86_64* codegen,
1675                                   ArenaAllocator* allocator,
1676                                   bool start_at_zero) {
1677   LocationSummary* locations = invoke->GetLocations();
1678 
1679   // Note that the null check must have been done earlier.
1680   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1681 
1682   CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1683   CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1684   CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1685   CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1686   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1687 
1688   // Check our assumptions for registers.
1689   DCHECK_EQ(string_obj.AsRegister(), RDI);
1690   DCHECK_EQ(search_value.AsRegister(), RAX);
1691   DCHECK_EQ(counter.AsRegister(), RCX);
1692   DCHECK_EQ(out.AsRegister(), RDI);
1693 
1694   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1695   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1696   SlowPathCode* slow_path = nullptr;
1697   HInstruction* code_point = invoke->InputAt(1);
1698   if (code_point->IsIntConstant()) {
1699     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1700     std::numeric_limits<uint16_t>::max()) {
1701       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1702       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1703       slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1704       codegen->AddSlowPath(slow_path);
1705       __ jmp(slow_path->GetEntryLabel());
1706       __ Bind(slow_path->GetExitLabel());
1707       return;
1708     }
1709   } else if (code_point->GetType() != Primitive::kPrimChar) {
1710     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1711     slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1712     codegen->AddSlowPath(slow_path);
1713     __ j(kAbove, slow_path->GetEntryLabel());
1714   }
1715 
1716   // From here down, we know that we are looking for a char that fits in
1717   // 16 bits (uncompressed) or 8 bits (compressed).
1718   // Location of reference to data array within the String object.
1719   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1720   // Location of count within the String object.
1721   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1722 
1723   // Load the count field of the string containing the length and compression flag.
1724   __ movl(string_length, Address(string_obj, count_offset));
1725 
1726   // Do a zero-length check. Even with string compression `count == 0` means empty.
1727   // TODO: Support jecxz.
1728   NearLabel not_found_label;
1729   __ testl(string_length, string_length);
1730   __ j(kEqual, &not_found_label);
1731 
1732   if (mirror::kUseStringCompression) {
1733     // Use TMP to keep string_length_flagged.
1734     __ movl(CpuRegister(TMP), string_length);
1735     // Mask out first bit used as compression flag.
1736     __ shrl(string_length, Immediate(1));
1737   }
1738 
1739   if (start_at_zero) {
1740     // Number of chars to scan is the same as the string length.
1741     __ movl(counter, string_length);
1742     // Move to the start of the string.
1743     __ addq(string_obj, Immediate(value_offset));
1744   } else {
1745     CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1746 
1747     // Do a start_index check.
1748     __ cmpl(start_index, string_length);
1749     __ j(kGreaterEqual, &not_found_label);
1750 
1751     // Ensure we have a start index >= 0;
1752     __ xorl(counter, counter);
1753     __ cmpl(start_index, Immediate(0));
1754     __ cmov(kGreater, counter, start_index, /* is64bit */ false);  // 32-bit copy is enough.
1755 
1756     if (mirror::kUseStringCompression) {
1757       NearLabel modify_counter, offset_uncompressed_label;
1758       __ testl(CpuRegister(TMP), Immediate(1));
1759       __ j(kNotZero, &offset_uncompressed_label);
1760       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1761       __ jmp(&modify_counter);
1762       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1763       __ Bind(&offset_uncompressed_label);
1764       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1765       __ Bind(&modify_counter);
1766     } else {
1767       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1768     }
1769     // Now update ecx, the work counter: it's gonna be string.length - start_index.
1770     __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
1771     __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1772   }
1773 
1774   if (mirror::kUseStringCompression) {
1775     NearLabel uncompressed_string_comparison;
1776     NearLabel comparison_done;
1777     __ testl(CpuRegister(TMP), Immediate(1));
1778     __ j(kNotZero, &uncompressed_string_comparison);
1779     // Check if RAX (search_value) is ASCII.
1780     __ cmpl(search_value, Immediate(127));
1781     __ j(kGreater, &not_found_label);
1782     // Comparing byte-per-byte.
1783     __ repne_scasb();
1784     __ jmp(&comparison_done);
1785     // Everything is set up for repne scasw:
1786     //   * Comparison address in RDI.
1787     //   * Counter in ECX.
1788     __ Bind(&uncompressed_string_comparison);
1789     __ repne_scasw();
1790     __ Bind(&comparison_done);
1791   } else {
1792     __ repne_scasw();
1793   }
1794   // Did we find a match?
1795   __ j(kNotEqual, &not_found_label);
1796 
1797   // Yes, we matched.  Compute the index of the result.
1798   __ subl(string_length, counter);
1799   __ leal(out, Address(string_length, -1));
1800 
1801   NearLabel done;
1802   __ jmp(&done);
1803 
1804   // Failed to match; return -1.
1805   __ Bind(&not_found_label);
1806   __ movl(out, Immediate(-1));
1807 
1808   // And join up at the end.
1809   __ Bind(&done);
1810   if (slow_path != nullptr) {
1811     __ Bind(slow_path->GetExitLabel());
1812   }
1813 }
1814 
VisitStringIndexOf(HInvoke * invoke)1815 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1816   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
1817 }
1818 
VisitStringIndexOf(HInvoke * invoke)1819 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1820   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1821 }
1822 
VisitStringIndexOfAfter(HInvoke * invoke)1823 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1824   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
1825 }
1826 
VisitStringIndexOfAfter(HInvoke * invoke)1827 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1828   GenerateStringIndexOf(
1829       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1830 }
1831 
VisitStringNewStringFromBytes(HInvoke * invoke)1832 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1833   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1834                                                             LocationSummary::kCallOnMainAndSlowPath,
1835                                                             kIntrinsified);
1836   InvokeRuntimeCallingConvention calling_convention;
1837   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1838   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1839   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1840   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1841   locations->SetOut(Location::RegisterLocation(RAX));
1842 }
1843 
VisitStringNewStringFromBytes(HInvoke * invoke)1844 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1845   X86_64Assembler* assembler = GetAssembler();
1846   LocationSummary* locations = invoke->GetLocations();
1847 
1848   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1849   __ testl(byte_array, byte_array);
1850   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1851   codegen_->AddSlowPath(slow_path);
1852   __ j(kEqual, slow_path->GetEntryLabel());
1853 
1854   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1855   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1856   __ Bind(slow_path->GetExitLabel());
1857 }
1858 
VisitStringNewStringFromChars(HInvoke * invoke)1859 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1860   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1861                                                             LocationSummary::kCallOnMainOnly,
1862                                                             kIntrinsified);
1863   InvokeRuntimeCallingConvention calling_convention;
1864   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1865   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1866   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1867   locations->SetOut(Location::RegisterLocation(RAX));
1868 }
1869 
VisitStringNewStringFromChars(HInvoke * invoke)1870 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1871   // No need to emit code checking whether `locations->InAt(2)` is a null
1872   // pointer, as callers of the native method
1873   //
1874   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1875   //
1876   // all include a null check on `data` before calling that method.
1877   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1878   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1879 }
1880 
VisitStringNewStringFromString(HInvoke * invoke)1881 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1882   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1883                                                             LocationSummary::kCallOnMainAndSlowPath,
1884                                                             kIntrinsified);
1885   InvokeRuntimeCallingConvention calling_convention;
1886   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1887   locations->SetOut(Location::RegisterLocation(RAX));
1888 }
1889 
VisitStringNewStringFromString(HInvoke * invoke)1890 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1891   X86_64Assembler* assembler = GetAssembler();
1892   LocationSummary* locations = invoke->GetLocations();
1893 
1894   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1895   __ testl(string_to_copy, string_to_copy);
1896   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1897   codegen_->AddSlowPath(slow_path);
1898   __ j(kEqual, slow_path->GetEntryLabel());
1899 
1900   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1901   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1902   __ Bind(slow_path->GetExitLabel());
1903 }
1904 
VisitStringGetCharsNoCheck(HInvoke * invoke)1905 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1906   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1907   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1908                                                             LocationSummary::kNoCall,
1909                                                             kIntrinsified);
1910   locations->SetInAt(0, Location::RequiresRegister());
1911   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1912   locations->SetInAt(2, Location::RequiresRegister());
1913   locations->SetInAt(3, Location::RequiresRegister());
1914   locations->SetInAt(4, Location::RequiresRegister());
1915 
1916   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1917   locations->AddTemp(Location::RegisterLocation(RSI));
1918   locations->AddTemp(Location::RegisterLocation(RDI));
1919   locations->AddTemp(Location::RegisterLocation(RCX));
1920 }
1921 
VisitStringGetCharsNoCheck(HInvoke * invoke)1922 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1923   X86_64Assembler* assembler = GetAssembler();
1924   LocationSummary* locations = invoke->GetLocations();
1925 
1926   size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1927   // Location of data in char array buffer.
1928   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1929   // Location of char array data in string.
1930   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1931 
1932   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1933   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1934   Location srcBegin = locations->InAt(1);
1935   int srcBegin_value =
1936     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1937   CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1938   CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1939   CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1940 
1941   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1942   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1943   DCHECK_EQ(char_size, 2u);
1944 
1945   NearLabel done;
1946   // Compute the number of chars (words) to move.
1947   __ movl(CpuRegister(RCX), srcEnd);
1948   if (srcBegin.IsConstant()) {
1949     __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1950   } else {
1951     DCHECK(srcBegin.IsRegister());
1952     __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1953   }
1954   if (mirror::kUseStringCompression) {
1955     NearLabel copy_uncompressed, copy_loop;
1956     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1957     DCHECK_EQ(c_char_size, 1u);
1958     // Location of count in string.
1959     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1960 
1961     __ testl(Address(obj, count_offset), Immediate(1));
1962     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1963                   "Expecting 0=compressed, 1=uncompressed");
1964     __ j(kNotZero, &copy_uncompressed);
1965     // Compute the address of the source string by adding the number of chars from
1966     // the source beginning to the value offset of a string.
1967     __ leaq(CpuRegister(RSI),
1968             CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1969     // Start the loop to copy String's value to Array of Char.
1970     __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1971 
1972     __ Bind(&copy_loop);
1973     __ jrcxz(&done);
1974     // Use TMP as temporary (convert byte from RSI to word).
1975     // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1976     __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1977     __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1978     __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1979     __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1980     // TODO: Add support for LOOP to X86_64Assembler.
1981     __ subl(CpuRegister(RCX), Immediate(1));
1982     __ jmp(&copy_loop);
1983 
1984     __ Bind(&copy_uncompressed);
1985   }
1986 
1987   __ leaq(CpuRegister(RSI),
1988           CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1989   // Compute the address of the destination buffer.
1990   __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1991   // Do the move.
1992   __ rep_movsw();
1993 
1994   __ Bind(&done);
1995 }
1996 
GenPeek(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)1997 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1998   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1999   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
2000   // x86 allows unaligned access. We do not have to check the input or use specific instructions
2001   // to avoid a SIGBUS.
2002   switch (size) {
2003     case Primitive::kPrimByte:
2004       __ movsxb(out, Address(address, 0));
2005       break;
2006     case Primitive::kPrimShort:
2007       __ movsxw(out, Address(address, 0));
2008       break;
2009     case Primitive::kPrimInt:
2010       __ movl(out, Address(address, 0));
2011       break;
2012     case Primitive::kPrimLong:
2013       __ movq(out, Address(address, 0));
2014       break;
2015     default:
2016       LOG(FATAL) << "Type not recognized for peek: " << size;
2017       UNREACHABLE();
2018   }
2019 }
2020 
VisitMemoryPeekByte(HInvoke * invoke)2021 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
2022   CreateIntToIntLocations(arena_, invoke);
2023 }
2024 
VisitMemoryPeekByte(HInvoke * invoke)2025 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
2026   GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
2027 }
2028 
VisitMemoryPeekIntNative(HInvoke * invoke)2029 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
2030   CreateIntToIntLocations(arena_, invoke);
2031 }
2032 
VisitMemoryPeekIntNative(HInvoke * invoke)2033 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
2034   GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
2035 }
2036 
VisitMemoryPeekLongNative(HInvoke * invoke)2037 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
2038   CreateIntToIntLocations(arena_, invoke);
2039 }
2040 
VisitMemoryPeekLongNative(HInvoke * invoke)2041 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
2042   GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
2043 }
2044 
VisitMemoryPeekShortNative(HInvoke * invoke)2045 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
2046   CreateIntToIntLocations(arena_, invoke);
2047 }
2048 
VisitMemoryPeekShortNative(HInvoke * invoke)2049 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
2050   GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
2051 }
2052 
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)2053 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
2054   LocationSummary* locations = new (arena) LocationSummary(invoke,
2055                                                            LocationSummary::kNoCall,
2056                                                            kIntrinsified);
2057   locations->SetInAt(0, Location::RequiresRegister());
2058   locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
2059 }
2060 
GenPoke(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)2061 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
2062   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
2063   Location value = locations->InAt(1);
2064   // x86 allows unaligned access. We do not have to check the input or use specific instructions
2065   // to avoid a SIGBUS.
2066   switch (size) {
2067     case Primitive::kPrimByte:
2068       if (value.IsConstant()) {
2069         __ movb(Address(address, 0),
2070                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
2071       } else {
2072         __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
2073       }
2074       break;
2075     case Primitive::kPrimShort:
2076       if (value.IsConstant()) {
2077         __ movw(Address(address, 0),
2078                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
2079       } else {
2080         __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
2081       }
2082       break;
2083     case Primitive::kPrimInt:
2084       if (value.IsConstant()) {
2085         __ movl(Address(address, 0),
2086                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
2087       } else {
2088         __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
2089       }
2090       break;
2091     case Primitive::kPrimLong:
2092       if (value.IsConstant()) {
2093         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
2094         DCHECK(IsInt<32>(v));
2095         int32_t v_32 = v;
2096         __ movq(Address(address, 0), Immediate(v_32));
2097       } else {
2098         __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
2099       }
2100       break;
2101     default:
2102       LOG(FATAL) << "Type not recognized for poke: " << size;
2103       UNREACHABLE();
2104   }
2105 }
2106 
VisitMemoryPokeByte(HInvoke * invoke)2107 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
2108   CreateIntIntToVoidLocations(arena_, invoke);
2109 }
2110 
VisitMemoryPokeByte(HInvoke * invoke)2111 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
2112   GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
2113 }
2114 
VisitMemoryPokeIntNative(HInvoke * invoke)2115 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
2116   CreateIntIntToVoidLocations(arena_, invoke);
2117 }
2118 
VisitMemoryPokeIntNative(HInvoke * invoke)2119 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
2120   GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
2121 }
2122 
VisitMemoryPokeLongNative(HInvoke * invoke)2123 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
2124   CreateIntIntToVoidLocations(arena_, invoke);
2125 }
2126 
VisitMemoryPokeLongNative(HInvoke * invoke)2127 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
2128   GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
2129 }
2130 
VisitMemoryPokeShortNative(HInvoke * invoke)2131 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
2132   CreateIntIntToVoidLocations(arena_, invoke);
2133 }
2134 
VisitMemoryPokeShortNative(HInvoke * invoke)2135 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
2136   GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
2137 }
2138 
VisitThreadCurrentThread(HInvoke * invoke)2139 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
2140   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2141                                                             LocationSummary::kNoCall,
2142                                                             kIntrinsified);
2143   locations->SetOut(Location::RequiresRegister());
2144 }
2145 
VisitThreadCurrentThread(HInvoke * invoke)2146 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
2147   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
2148   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
2149                                                     /* no_rip */ true));
2150 }
2151 
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile ATTRIBUTE_UNUSED,CodeGeneratorX86_64 * codegen)2152 static void GenUnsafeGet(HInvoke* invoke,
2153                          Primitive::Type type,
2154                          bool is_volatile ATTRIBUTE_UNUSED,
2155                          CodeGeneratorX86_64* codegen) {
2156   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2157   LocationSummary* locations = invoke->GetLocations();
2158   Location base_loc = locations->InAt(1);
2159   CpuRegister base = base_loc.AsRegister<CpuRegister>();
2160   Location offset_loc = locations->InAt(2);
2161   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
2162   Location output_loc = locations->Out();
2163   CpuRegister output = output_loc.AsRegister<CpuRegister>();
2164 
2165   switch (type) {
2166     case Primitive::kPrimInt:
2167       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2168       break;
2169 
2170     case Primitive::kPrimNot: {
2171       if (kEmitCompilerReadBarrier) {
2172         if (kUseBakerReadBarrier) {
2173           Address src(base, offset, ScaleFactor::TIMES_1, 0);
2174           codegen->GenerateReferenceLoadWithBakerReadBarrier(
2175               invoke, output_loc, base, src, /* needs_null_check */ false);
2176         } else {
2177           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2178           codegen->GenerateReadBarrierSlow(
2179               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
2180         }
2181       } else {
2182         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2183         __ MaybeUnpoisonHeapReference(output);
2184       }
2185       break;
2186     }
2187 
2188     case Primitive::kPrimLong:
2189       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2190       break;
2191 
2192     default:
2193       LOG(FATAL) << "Unsupported op size " << type;
2194       UNREACHABLE();
2195   }
2196 }
2197 
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)2198 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
2199   bool can_call = kEmitCompilerReadBarrier &&
2200       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
2201        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
2202   LocationSummary* locations = new (arena) LocationSummary(invoke,
2203                                                            (can_call
2204                                                                 ? LocationSummary::kCallOnSlowPath
2205                                                                 : LocationSummary::kNoCall),
2206                                                            kIntrinsified);
2207   if (can_call && kUseBakerReadBarrier) {
2208     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2209   }
2210   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2211   locations->SetInAt(1, Location::RequiresRegister());
2212   locations->SetInAt(2, Location::RequiresRegister());
2213   locations->SetOut(Location::RequiresRegister(),
2214                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2215 }
2216 
VisitUnsafeGet(HInvoke * invoke)2217 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
2218   CreateIntIntIntToIntLocations(arena_, invoke);
2219 }
VisitUnsafeGetVolatile(HInvoke * invoke)2220 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2221   CreateIntIntIntToIntLocations(arena_, invoke);
2222 }
VisitUnsafeGetLong(HInvoke * invoke)2223 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2224   CreateIntIntIntToIntLocations(arena_, invoke);
2225 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2226 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2227   CreateIntIntIntToIntLocations(arena_, invoke);
2228 }
VisitUnsafeGetObject(HInvoke * invoke)2229 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2230   CreateIntIntIntToIntLocations(arena_, invoke);
2231 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2232 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2233   CreateIntIntIntToIntLocations(arena_, invoke);
2234 }
2235 
2236 
VisitUnsafeGet(HInvoke * invoke)2237 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
2238   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2239 }
VisitUnsafeGetVolatile(HInvoke * invoke)2240 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2241   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2242 }
VisitUnsafeGetLong(HInvoke * invoke)2243 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2244   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2245 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2246 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2247   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2248 }
VisitUnsafeGetObject(HInvoke * invoke)2249 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2250   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2251 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2252 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2253   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2254 }
2255 
2256 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)2257 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
2258                                                        Primitive::Type type,
2259                                                        HInvoke* invoke) {
2260   LocationSummary* locations = new (arena) LocationSummary(invoke,
2261                                                            LocationSummary::kNoCall,
2262                                                            kIntrinsified);
2263   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2264   locations->SetInAt(1, Location::RequiresRegister());
2265   locations->SetInAt(2, Location::RequiresRegister());
2266   locations->SetInAt(3, Location::RequiresRegister());
2267   if (type == Primitive::kPrimNot) {
2268     // Need temp registers for card-marking.
2269     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2270     locations->AddTemp(Location::RequiresRegister());
2271   }
2272 }
2273 
VisitUnsafePut(HInvoke * invoke)2274 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2275   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2276 }
VisitUnsafePutOrdered(HInvoke * invoke)2277 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2278   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2279 }
VisitUnsafePutVolatile(HInvoke * invoke)2280 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2281   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2282 }
VisitUnsafePutObject(HInvoke * invoke)2283 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2284   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2285 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2286 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2287   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2288 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2289 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2290   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2291 }
VisitUnsafePutLong(HInvoke * invoke)2292 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2293   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2294 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2295 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2296   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2297 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2298 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2299   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2300 }
2301 
2302 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2303 // memory model.
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2304 static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
2305                          CodeGeneratorX86_64* codegen) {
2306   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2307   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2308   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2309   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2310 
2311   if (type == Primitive::kPrimLong) {
2312     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2313   } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2314     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2315     __ movl(temp, value);
2316     __ PoisonHeapReference(temp);
2317     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2318   } else {
2319     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2320   }
2321 
2322   if (is_volatile) {
2323     codegen->MemoryFence();
2324   }
2325 
2326   if (type == Primitive::kPrimNot) {
2327     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2328     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2329                         locations->GetTemp(1).AsRegister<CpuRegister>(),
2330                         base,
2331                         value,
2332                         value_can_be_null);
2333   }
2334 }
2335 
VisitUnsafePut(HInvoke * invoke)2336 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2337   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2338 }
VisitUnsafePutOrdered(HInvoke * invoke)2339 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2340   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2341 }
VisitUnsafePutVolatile(HInvoke * invoke)2342 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2343   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2344 }
VisitUnsafePutObject(HInvoke * invoke)2345 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2346   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2347 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2348 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2349   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2350 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2351 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2352   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2353 }
VisitUnsafePutLong(HInvoke * invoke)2354 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2355   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2356 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2357 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2358   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2359 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2360 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2361   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2362 }
2363 
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)2364 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
2365                                        Primitive::Type type,
2366                                        HInvoke* invoke) {
2367   bool can_call = kEmitCompilerReadBarrier &&
2368       kUseBakerReadBarrier &&
2369       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
2370   LocationSummary* locations = new (arena) LocationSummary(invoke,
2371                                                            (can_call
2372                                                                 ? LocationSummary::kCallOnSlowPath
2373                                                                 : LocationSummary::kNoCall),
2374                                                            kIntrinsified);
2375   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2376   locations->SetInAt(1, Location::RequiresRegister());
2377   locations->SetInAt(2, Location::RequiresRegister());
2378   // expected value must be in EAX/RAX.
2379   locations->SetInAt(3, Location::RegisterLocation(RAX));
2380   locations->SetInAt(4, Location::RequiresRegister());
2381 
2382   locations->SetOut(Location::RequiresRegister());
2383   if (type == Primitive::kPrimNot) {
2384     // Need temporary registers for card-marking, and possibly for
2385     // (Baker) read barrier.
2386     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2387     locations->AddTemp(Location::RequiresRegister());
2388   }
2389 }
2390 
VisitUnsafeCASInt(HInvoke * invoke)2391 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2392   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2393 }
2394 
VisitUnsafeCASLong(HInvoke * invoke)2395 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2396   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2397 }
2398 
VisitUnsafeCASObject(HInvoke * invoke)2399 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2400   // The only read barrier implementation supporting the
2401   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2402   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2403     return;
2404   }
2405 
2406   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2407 }
2408 
GenCAS(Primitive::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2409 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2410   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2411   LocationSummary* locations = invoke->GetLocations();
2412 
2413   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2414   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2415   CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
2416   // Ensure `expected` is in RAX (required by the CMPXCHG instruction).
2417   DCHECK_EQ(expected.AsRegister(), RAX);
2418   CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
2419   Location out_loc = locations->Out();
2420   CpuRegister out = out_loc.AsRegister<CpuRegister>();
2421 
2422   if (type == Primitive::kPrimNot) {
2423     // The only read barrier implementation supporting the
2424     // UnsafeCASObject intrinsic is the Baker-style read barriers.
2425     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2426 
2427     CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2428     CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2429 
2430     // Mark card for object assuming new value is stored.
2431     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2432     codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2433 
2434     // The address of the field within the holding object.
2435     Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2436 
2437     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2438       // Need to make sure the reference stored in the field is a to-space
2439       // one before attempting the CAS or the CAS could fail incorrectly.
2440       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2441           invoke,
2442           out_loc,  // Unused, used only as a "temporary" within the read barrier.
2443           base,
2444           field_addr,
2445           /* needs_null_check */ false,
2446           /* always_update_field */ true,
2447           &temp1,
2448           &temp2);
2449     }
2450 
2451     bool base_equals_value = (base.AsRegister() == value.AsRegister());
2452     Register value_reg = value.AsRegister();
2453     if (kPoisonHeapReferences) {
2454       if (base_equals_value) {
2455         // If `base` and `value` are the same register location, move
2456         // `value_reg` to a temporary register.  This way, poisoning
2457         // `value_reg` won't invalidate `base`.
2458         value_reg = temp1.AsRegister();
2459         __ movl(CpuRegister(value_reg), base);
2460       }
2461 
2462       // Check that the register allocator did not assign the location
2463       // of `expected` (RAX) to `value` nor to `base`, so that heap
2464       // poisoning (when enabled) works as intended below.
2465       // - If `value` were equal to `expected`, both references would
2466       //   be poisoned twice, meaning they would not be poisoned at
2467       //   all, as heap poisoning uses address negation.
2468       // - If `base` were equal to `expected`, poisoning `expected`
2469       //   would invalidate `base`.
2470       DCHECK_NE(value_reg, expected.AsRegister());
2471       DCHECK_NE(base.AsRegister(), expected.AsRegister());
2472 
2473       __ PoisonHeapReference(expected);
2474       __ PoisonHeapReference(CpuRegister(value_reg));
2475     }
2476 
2477     __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2478 
2479     // LOCK CMPXCHG has full barrier semantics, and we don't need
2480     // scheduling barriers at this time.
2481 
2482     // Convert ZF into the Boolean result.
2483     __ setcc(kZero, out);
2484     __ movzxb(out, out);
2485 
2486     // If heap poisoning is enabled, we need to unpoison the values
2487     // that were poisoned earlier.
2488     if (kPoisonHeapReferences) {
2489       if (base_equals_value) {
2490         // `value_reg` has been moved to a temporary register, no need
2491         // to unpoison it.
2492       } else {
2493         // Ensure `value` is different from `out`, so that unpoisoning
2494         // the former does not invalidate the latter.
2495         DCHECK_NE(value_reg, out.AsRegister());
2496         __ UnpoisonHeapReference(CpuRegister(value_reg));
2497       }
2498       // Ensure `expected` is different from `out`, so that unpoisoning
2499       // the former does not invalidate the latter.
2500       DCHECK_NE(expected.AsRegister(), out.AsRegister());
2501       __ UnpoisonHeapReference(expected);
2502     }
2503   } else {
2504     if (type == Primitive::kPrimInt) {
2505       __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
2506     } else if (type == Primitive::kPrimLong) {
2507       __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
2508     } else {
2509       LOG(FATAL) << "Unexpected CAS type " << type;
2510     }
2511 
2512     // LOCK CMPXCHG has full barrier semantics, and we don't need
2513     // scheduling barriers at this time.
2514 
2515     // Convert ZF into the Boolean result.
2516     __ setcc(kZero, out);
2517     __ movzxb(out, out);
2518   }
2519 }
2520 
VisitUnsafeCASInt(HInvoke * invoke)2521 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2522   GenCAS(Primitive::kPrimInt, invoke, codegen_);
2523 }
2524 
VisitUnsafeCASLong(HInvoke * invoke)2525 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2526   GenCAS(Primitive::kPrimLong, invoke, codegen_);
2527 }
2528 
VisitUnsafeCASObject(HInvoke * invoke)2529 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2530   // The only read barrier implementation supporting the
2531   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2532   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2533 
2534   GenCAS(Primitive::kPrimNot, invoke, codegen_);
2535 }
2536 
VisitIntegerReverse(HInvoke * invoke)2537 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2538   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2539                                                            LocationSummary::kNoCall,
2540                                                            kIntrinsified);
2541   locations->SetInAt(0, Location::RequiresRegister());
2542   locations->SetOut(Location::SameAsFirstInput());
2543   locations->AddTemp(Location::RequiresRegister());
2544 }
2545 
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2546 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2547                      X86_64Assembler* assembler) {
2548   Immediate imm_shift(shift);
2549   Immediate imm_mask(mask);
2550   __ movl(temp, reg);
2551   __ shrl(reg, imm_shift);
2552   __ andl(temp, imm_mask);
2553   __ andl(reg, imm_mask);
2554   __ shll(temp, imm_shift);
2555   __ orl(reg, temp);
2556 }
2557 
VisitIntegerReverse(HInvoke * invoke)2558 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2559   X86_64Assembler* assembler = GetAssembler();
2560   LocationSummary* locations = invoke->GetLocations();
2561 
2562   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2563   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2564 
2565   /*
2566    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2567    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2568    * compared to generic luni implementation which has 5 rounds of swapping bits.
2569    * x = bswap x
2570    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2571    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2572    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2573    */
2574   __ bswapl(reg);
2575   SwapBits(reg, temp, 1, 0x55555555, assembler);
2576   SwapBits(reg, temp, 2, 0x33333333, assembler);
2577   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2578 }
2579 
VisitLongReverse(HInvoke * invoke)2580 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2581   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2582                                                            LocationSummary::kNoCall,
2583                                                            kIntrinsified);
2584   locations->SetInAt(0, Location::RequiresRegister());
2585   locations->SetOut(Location::SameAsFirstInput());
2586   locations->AddTemp(Location::RequiresRegister());
2587   locations->AddTemp(Location::RequiresRegister());
2588 }
2589 
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2590 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2591                        int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2592   Immediate imm_shift(shift);
2593   __ movq(temp_mask, Immediate(mask));
2594   __ movq(temp, reg);
2595   __ shrq(reg, imm_shift);
2596   __ andq(temp, temp_mask);
2597   __ andq(reg, temp_mask);
2598   __ shlq(temp, imm_shift);
2599   __ orq(reg, temp);
2600 }
2601 
VisitLongReverse(HInvoke * invoke)2602 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2603   X86_64Assembler* assembler = GetAssembler();
2604   LocationSummary* locations = invoke->GetLocations();
2605 
2606   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2607   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2608   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2609 
2610   /*
2611    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2612    * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2613    * compared to generic luni implementation which has 5 rounds of swapping bits.
2614    * x = bswap x
2615    * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2616    * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2617    * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2618    */
2619   __ bswapq(reg);
2620   SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2621   SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2622   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2623 }
2624 
CreateBitCountLocations(ArenaAllocator * arena,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2625 static void CreateBitCountLocations(
2626     ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2627   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2628     // Do nothing if there is no popcnt support. This results in generating
2629     // a call for the intrinsic rather than direct code.
2630     return;
2631   }
2632   LocationSummary* locations = new (arena) LocationSummary(invoke,
2633                                                            LocationSummary::kNoCall,
2634                                                            kIntrinsified);
2635   locations->SetInAt(0, Location::Any());
2636   locations->SetOut(Location::RequiresRegister());
2637 }
2638 
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2639 static void GenBitCount(X86_64Assembler* assembler,
2640                         CodeGeneratorX86_64* codegen,
2641                         HInvoke* invoke,
2642                         bool is_long) {
2643   LocationSummary* locations = invoke->GetLocations();
2644   Location src = locations->InAt(0);
2645   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2646 
2647   if (invoke->InputAt(0)->IsConstant()) {
2648     // Evaluate this at compile time.
2649     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2650     int32_t result = is_long
2651         ? POPCOUNT(static_cast<uint64_t>(value))
2652         : POPCOUNT(static_cast<uint32_t>(value));
2653     codegen->Load32BitValue(out, result);
2654     return;
2655   }
2656 
2657   if (src.IsRegister()) {
2658     if (is_long) {
2659       __ popcntq(out, src.AsRegister<CpuRegister>());
2660     } else {
2661       __ popcntl(out, src.AsRegister<CpuRegister>());
2662     }
2663   } else if (is_long) {
2664     DCHECK(src.IsDoubleStackSlot());
2665     __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2666   } else {
2667     DCHECK(src.IsStackSlot());
2668     __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2669   }
2670 }
2671 
VisitIntegerBitCount(HInvoke * invoke)2672 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2673   CreateBitCountLocations(arena_, codegen_, invoke);
2674 }
2675 
VisitIntegerBitCount(HInvoke * invoke)2676 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2677   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
2678 }
2679 
VisitLongBitCount(HInvoke * invoke)2680 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2681   CreateBitCountLocations(arena_, codegen_, invoke);
2682 }
2683 
VisitLongBitCount(HInvoke * invoke)2684 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2685   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
2686 }
2687 
CreateOneBitLocations(ArenaAllocator * arena,HInvoke * invoke,bool is_high)2688 static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) {
2689   LocationSummary* locations = new (arena) LocationSummary(invoke,
2690                                                            LocationSummary::kNoCall,
2691                                                            kIntrinsified);
2692   locations->SetInAt(0, Location::Any());
2693   locations->SetOut(Location::RequiresRegister());
2694   locations->AddTemp(is_high ? Location::RegisterLocation(RCX)  // needs CL
2695                              : Location::RequiresRegister());  // any will do
2696 }
2697 
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2698 static void GenOneBit(X86_64Assembler* assembler,
2699                       CodeGeneratorX86_64* codegen,
2700                       HInvoke* invoke,
2701                       bool is_high, bool is_long) {
2702   LocationSummary* locations = invoke->GetLocations();
2703   Location src = locations->InAt(0);
2704   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2705 
2706   if (invoke->InputAt(0)->IsConstant()) {
2707     // Evaluate this at compile time.
2708     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2709     if (value == 0) {
2710       __ xorl(out, out);  // Clears upper bits too.
2711       return;
2712     }
2713     // Nonzero value.
2714     if (is_high) {
2715       value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2716                       : 31 - CLZ(static_cast<uint32_t>(value));
2717     } else {
2718       value = is_long ? CTZ(static_cast<uint64_t>(value))
2719                       : CTZ(static_cast<uint32_t>(value));
2720     }
2721     if (is_long) {
2722       codegen->Load64BitValue(out, 1ULL << value);
2723     } else {
2724       codegen->Load32BitValue(out, 1 << value);
2725     }
2726     return;
2727   }
2728 
2729   // Handle the non-constant cases.
2730   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2731   if (is_high) {
2732     // Use architectural support: basically 1 << bsr.
2733     if (src.IsRegister()) {
2734       if (is_long) {
2735         __ bsrq(tmp, src.AsRegister<CpuRegister>());
2736       } else {
2737         __ bsrl(tmp, src.AsRegister<CpuRegister>());
2738       }
2739     } else if (is_long) {
2740       DCHECK(src.IsDoubleStackSlot());
2741       __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2742     } else {
2743       DCHECK(src.IsStackSlot());
2744       __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2745     }
2746     // BSR sets ZF if the input was zero.
2747     NearLabel is_zero, done;
2748     __ j(kEqual, &is_zero);
2749     __ movl(out, Immediate(1));  // Clears upper bits too.
2750     if (is_long) {
2751       __ shlq(out, tmp);
2752     } else {
2753       __ shll(out, tmp);
2754     }
2755     __ jmp(&done);
2756     __ Bind(&is_zero);
2757     __ xorl(out, out);  // Clears upper bits too.
2758     __ Bind(&done);
2759   } else  {
2760     // Copy input into temporary.
2761     if (src.IsRegister()) {
2762       if (is_long) {
2763         __ movq(tmp, src.AsRegister<CpuRegister>());
2764       } else {
2765         __ movl(tmp, src.AsRegister<CpuRegister>());
2766       }
2767     } else if (is_long) {
2768       DCHECK(src.IsDoubleStackSlot());
2769       __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2770     } else {
2771       DCHECK(src.IsStackSlot());
2772       __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2773     }
2774     // Do the bit twiddling: basically tmp & -tmp;
2775     if (is_long) {
2776       __ movq(out, tmp);
2777       __ negq(tmp);
2778       __ andq(out, tmp);
2779     } else {
2780       __ movl(out, tmp);
2781       __ negl(tmp);
2782       __ andl(out, tmp);
2783     }
2784   }
2785 }
2786 
VisitIntegerHighestOneBit(HInvoke * invoke)2787 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2788   CreateOneBitLocations(arena_, invoke, /* is_high */ true);
2789 }
2790 
VisitIntegerHighestOneBit(HInvoke * invoke)2791 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2792   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false);
2793 }
2794 
VisitLongHighestOneBit(HInvoke * invoke)2795 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2796   CreateOneBitLocations(arena_, invoke, /* is_high */ true);
2797 }
2798 
VisitLongHighestOneBit(HInvoke * invoke)2799 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2800   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true);
2801 }
2802 
VisitIntegerLowestOneBit(HInvoke * invoke)2803 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2804   CreateOneBitLocations(arena_, invoke, /* is_high */ false);
2805 }
2806 
VisitIntegerLowestOneBit(HInvoke * invoke)2807 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2808   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false);
2809 }
2810 
VisitLongLowestOneBit(HInvoke * invoke)2811 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2812   CreateOneBitLocations(arena_, invoke, /* is_high */ false);
2813 }
2814 
VisitLongLowestOneBit(HInvoke * invoke)2815 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2816   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true);
2817 }
2818 
CreateLeadingZeroLocations(ArenaAllocator * arena,HInvoke * invoke)2819 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
2820   LocationSummary* locations = new (arena) LocationSummary(invoke,
2821                                                            LocationSummary::kNoCall,
2822                                                            kIntrinsified);
2823   locations->SetInAt(0, Location::Any());
2824   locations->SetOut(Location::RequiresRegister());
2825 }
2826 
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2827 static void GenLeadingZeros(X86_64Assembler* assembler,
2828                             CodeGeneratorX86_64* codegen,
2829                             HInvoke* invoke, bool is_long) {
2830   LocationSummary* locations = invoke->GetLocations();
2831   Location src = locations->InAt(0);
2832   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2833 
2834   int zero_value_result = is_long ? 64 : 32;
2835   if (invoke->InputAt(0)->IsConstant()) {
2836     // Evaluate this at compile time.
2837     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2838     if (value == 0) {
2839       value = zero_value_result;
2840     } else {
2841       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2842     }
2843     codegen->Load32BitValue(out, value);
2844     return;
2845   }
2846 
2847   // Handle the non-constant cases.
2848   if (src.IsRegister()) {
2849     if (is_long) {
2850       __ bsrq(out, src.AsRegister<CpuRegister>());
2851     } else {
2852       __ bsrl(out, src.AsRegister<CpuRegister>());
2853     }
2854   } else if (is_long) {
2855     DCHECK(src.IsDoubleStackSlot());
2856     __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2857   } else {
2858     DCHECK(src.IsStackSlot());
2859     __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2860   }
2861 
2862   // BSR sets ZF if the input was zero, and the output is undefined.
2863   NearLabel is_zero, done;
2864   __ j(kEqual, &is_zero);
2865 
2866   // Correct the result from BSR to get the CLZ result.
2867   __ xorl(out, Immediate(zero_value_result - 1));
2868   __ jmp(&done);
2869 
2870   // Fix the zero case with the expected result.
2871   __ Bind(&is_zero);
2872   __ movl(out, Immediate(zero_value_result));
2873 
2874   __ Bind(&done);
2875 }
2876 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2877 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2878   CreateLeadingZeroLocations(arena_, invoke);
2879 }
2880 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2881 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2882   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2883 }
2884 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2885 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2886   CreateLeadingZeroLocations(arena_, invoke);
2887 }
2888 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2889 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2890   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2891 }
2892 
CreateTrailingZeroLocations(ArenaAllocator * arena,HInvoke * invoke)2893 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
2894   LocationSummary* locations = new (arena) LocationSummary(invoke,
2895                                                            LocationSummary::kNoCall,
2896                                                            kIntrinsified);
2897   locations->SetInAt(0, Location::Any());
2898   locations->SetOut(Location::RequiresRegister());
2899 }
2900 
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2901 static void GenTrailingZeros(X86_64Assembler* assembler,
2902                              CodeGeneratorX86_64* codegen,
2903                              HInvoke* invoke, bool is_long) {
2904   LocationSummary* locations = invoke->GetLocations();
2905   Location src = locations->InAt(0);
2906   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2907 
2908   int zero_value_result = is_long ? 64 : 32;
2909   if (invoke->InputAt(0)->IsConstant()) {
2910     // Evaluate this at compile time.
2911     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2912     if (value == 0) {
2913       value = zero_value_result;
2914     } else {
2915       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2916     }
2917     codegen->Load32BitValue(out, value);
2918     return;
2919   }
2920 
2921   // Handle the non-constant cases.
2922   if (src.IsRegister()) {
2923     if (is_long) {
2924       __ bsfq(out, src.AsRegister<CpuRegister>());
2925     } else {
2926       __ bsfl(out, src.AsRegister<CpuRegister>());
2927     }
2928   } else if (is_long) {
2929     DCHECK(src.IsDoubleStackSlot());
2930     __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2931   } else {
2932     DCHECK(src.IsStackSlot());
2933     __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2934   }
2935 
2936   // BSF sets ZF if the input was zero, and the output is undefined.
2937   NearLabel done;
2938   __ j(kNotEqual, &done);
2939 
2940   // Fix the zero case with the expected result.
2941   __ movl(out, Immediate(zero_value_result));
2942 
2943   __ Bind(&done);
2944 }
2945 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2946 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2947   CreateTrailingZeroLocations(arena_, invoke);
2948 }
2949 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2950 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2951   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2952 }
2953 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2954 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2955   CreateTrailingZeroLocations(arena_, invoke);
2956 }
2957 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2958 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2959   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2960 }
2961 
VisitReferenceGetReferent(HInvoke * invoke)2962 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
2963   if (kEmitCompilerReadBarrier) {
2964     // Do not intrinsify this call with the read barrier configuration.
2965     return;
2966   }
2967   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2968                                                             LocationSummary::kCallOnSlowPath,
2969                                                             kIntrinsified);
2970   locations->SetInAt(0, Location::RequiresRegister());
2971   locations->SetOut(Location::SameAsFirstInput());
2972   locations->AddTemp(Location::RequiresRegister());
2973 }
2974 
VisitReferenceGetReferent(HInvoke * invoke)2975 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
2976   DCHECK(!kEmitCompilerReadBarrier);
2977   LocationSummary* locations = invoke->GetLocations();
2978   X86_64Assembler* assembler = GetAssembler();
2979 
2980   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
2981   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2982 
2983   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
2984   codegen_->AddSlowPath(slow_path);
2985 
2986   // Load ArtMethod first.
2987   HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2988   DCHECK(invoke_direct != nullptr);
2989   Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
2990       invoke_direct, locations->GetTemp(0));
2991   DCHECK(temp_loc.Equals(locations->GetTemp(0)));
2992   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
2993 
2994   // Now get declaring class.
2995   __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
2996 
2997   uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2998   uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2999   DCHECK_NE(slow_path_flag_offset, 0u);
3000   DCHECK_NE(disable_flag_offset, 0u);
3001   DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
3002 
3003   // Check static flags preventing us for using intrinsic.
3004   if (slow_path_flag_offset == disable_flag_offset + 1) {
3005     __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
3006     __ j(kNotEqual, slow_path->GetEntryLabel());
3007   } else {
3008     __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
3009     __ j(kNotEqual, slow_path->GetEntryLabel());
3010     __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
3011     __ j(kNotEqual, slow_path->GetEntryLabel());
3012   }
3013 
3014   // Fast path.
3015   __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
3016   codegen_->MaybeRecordImplicitNullCheck(invoke);
3017   __ MaybeUnpoisonHeapReference(out);
3018   __ Bind(slow_path->GetExitLabel());
3019 }
3020 
VisitIntegerValueOf(HInvoke * invoke)3021 void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
3022   InvokeRuntimeCallingConvention calling_convention;
3023   IntrinsicVisitor::ComputeIntegerValueOfLocations(
3024       invoke,
3025       codegen_,
3026       Location::RegisterLocation(RAX),
3027       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3028 }
3029 
VisitIntegerValueOf(HInvoke * invoke)3030 void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
3031   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3032   LocationSummary* locations = invoke->GetLocations();
3033   X86_64Assembler* assembler = GetAssembler();
3034 
3035   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3036   InvokeRuntimeCallingConvention calling_convention;
3037   if (invoke->InputAt(0)->IsConstant()) {
3038     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3039     if (value >= info.low && value <= info.high) {
3040       // Just embed the j.l.Integer in the code.
3041       ScopedObjectAccess soa(Thread::Current());
3042       mirror::Object* boxed = info.cache->Get(value + (-info.low));
3043       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3044       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3045       __ movl(out, Immediate(static_cast<int32_t>(address)));
3046     } else {
3047       // Allocate and initialize a new j.l.Integer.
3048       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3049       // JIT object table.
3050       CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3051       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3052       __ movl(argument, Immediate(static_cast<int32_t>(address)));
3053       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3054       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3055       __ movl(Address(out, info.value_offset), Immediate(value));
3056     }
3057   } else {
3058     CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
3059     // Check bounds of our cache.
3060     __ leal(out, Address(in, -info.low));
3061     __ cmpl(out, Immediate(info.high - info.low + 1));
3062     NearLabel allocate, done;
3063     __ j(kAboveEqual, &allocate);
3064     // If the value is within the bounds, load the j.l.Integer directly from the array.
3065     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3066     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3067     if (data_offset + address <= std::numeric_limits<int32_t>::max()) {
3068       __ movl(out, Address(out, TIMES_4, data_offset + address));
3069     } else {
3070       CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0));
3071       __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address)));
3072       __ movl(out, Address(temp, out, TIMES_4, 0));
3073     }
3074     __ MaybeUnpoisonHeapReference(out);
3075     __ jmp(&done);
3076     __ Bind(&allocate);
3077     // Otherwise allocate and initialize a new j.l.Integer.
3078     CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3079     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3080     __ movl(argument, Immediate(static_cast<int32_t>(address)));
3081     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3082     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3083     __ movl(Address(out, info.value_offset), in);
3084     __ Bind(&done);
3085   }
3086 }
3087 
3088 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
3089 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
3090 
3091 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
3092 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
3093 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend);
3094 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength);
3095 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString);
3096 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppend);
3097 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength);
3098 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
3099 
3100 // 1.8.
3101 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
3102 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
3103 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
3104 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
3105 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
3106 
3107 UNREACHABLE_INTRINSICS(X86_64)
3108 
3109 #undef __
3110 
3111 }  // namespace x86_64
3112 }  // namespace art
3113