1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86_64.h"
18 
19 #include <limits>
20 
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method-inl.h"
23 #include "code_generator_x86_64.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "intrinsics.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/string.h"
28 #include "thread.h"
29 #include "utils/x86_64/assembler_x86_64.h"
30 #include "utils/x86_64/constants_x86_64.h"
31 
32 namespace art {
33 
34 namespace x86_64 {
35 
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)36 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
37   : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
38 }
39 
40 
GetAssembler()41 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
42   return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
43 }
44 
GetAllocator()45 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
46   return codegen_->GetGraph()->GetArena();
47 }
48 
TryDispatch(HInvoke * invoke)49 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
50   Dispatch(invoke);
51   const LocationSummary* res = invoke->GetLocations();
52   return res != nullptr && res->Intrinsified();
53 }
54 
55 #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
56 
57 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,Primitive::Type type,CodeGeneratorX86_64 * codegen)58 static void MoveFromReturnRegister(Location trg,
59                                    Primitive::Type type,
60                                    CodeGeneratorX86_64* codegen) {
61   if (!trg.IsValid()) {
62     DCHECK(type == Primitive::kPrimVoid);
63     return;
64   }
65 
66   switch (type) {
67     case Primitive::kPrimBoolean:
68     case Primitive::kPrimByte:
69     case Primitive::kPrimChar:
70     case Primitive::kPrimShort:
71     case Primitive::kPrimInt:
72     case Primitive::kPrimNot: {
73       CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
74       if (trg_reg.AsRegister() != RAX) {
75         __ movl(trg_reg, CpuRegister(RAX));
76       }
77       break;
78     }
79     case Primitive::kPrimLong: {
80       CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
81       if (trg_reg.AsRegister() != RAX) {
82         __ movq(trg_reg, CpuRegister(RAX));
83       }
84       break;
85     }
86 
87     case Primitive::kPrimVoid:
88       LOG(FATAL) << "Unexpected void type for valid location " << trg;
89       UNREACHABLE();
90 
91     case Primitive::kPrimDouble: {
92       XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
93       if (trg_reg.AsFloatRegister() != XMM0) {
94         __ movsd(trg_reg, XmmRegister(XMM0));
95       }
96       break;
97     }
98     case Primitive::kPrimFloat: {
99       XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
100       if (trg_reg.AsFloatRegister() != XMM0) {
101         __ movss(trg_reg, XmmRegister(XMM0));
102       }
103       break;
104     }
105   }
106 }
107 
MoveArguments(HInvoke * invoke,CodeGeneratorX86_64 * codegen)108 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
109   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
110   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
111 }
112 
113 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
114 // call. This will copy the arguments into the positions for a regular call.
115 //
116 // Note: The actual parameters are required to be in the locations given by the invoke's location
117 //       summary. If an intrinsic modifies those locations before a slowpath call, they must be
118 //       restored!
119 class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
120  public:
IntrinsicSlowPathX86_64(HInvoke * invoke)121   explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
122 
EmitNativeCode(CodeGenerator * codegen_in)123   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
124     CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
125     __ Bind(GetEntryLabel());
126 
127     SaveLiveRegisters(codegen, invoke_->GetLocations());
128 
129     MoveArguments(invoke_, codegen);
130 
131     if (invoke_->IsInvokeStaticOrDirect()) {
132       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
133       RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
134     } else {
135       UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
136       UNREACHABLE();
137     }
138 
139     // Copy the result back to the expected output.
140     Location out = invoke_->GetLocations()->Out();
141     if (out.IsValid()) {
142       DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
143       DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
144       MoveFromReturnRegister(out, invoke_->GetType(), codegen);
145     }
146 
147     RestoreLiveRegisters(codegen, invoke_->GetLocations());
148     __ jmp(GetExitLabel());
149   }
150 
151  private:
152   // The instruction where this slow path is happening.
153   HInvoke* const invoke_;
154 
155   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
156 };
157 
158 #undef __
159 #define __ assembler->
160 
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)161 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
162   LocationSummary* locations = new (arena) LocationSummary(invoke,
163                                                            LocationSummary::kNoCall,
164                                                            kIntrinsified);
165   locations->SetInAt(0, Location::RequiresFpuRegister());
166   locations->SetOut(Location::RequiresRegister());
167 }
168 
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)169 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
170   LocationSummary* locations = new (arena) LocationSummary(invoke,
171                                                            LocationSummary::kNoCall,
172                                                            kIntrinsified);
173   locations->SetInAt(0, Location::RequiresRegister());
174   locations->SetOut(Location::RequiresFpuRegister());
175 }
176 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)177 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
178   Location input = locations->InAt(0);
179   Location output = locations->Out();
180   __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
181 }
182 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)183 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
184   Location input = locations->InAt(0);
185   Location output = locations->Out();
186   __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
187 }
188 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)189 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
190   CreateFPToIntLocations(arena_, invoke);
191 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)192 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
193   CreateIntToFPLocations(arena_, invoke);
194 }
195 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)196 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
197   MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
198 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)199 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
200   MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
201 }
202 
VisitFloatFloatToRawIntBits(HInvoke * invoke)203 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
204   CreateFPToIntLocations(arena_, invoke);
205 }
VisitFloatIntBitsToFloat(HInvoke * invoke)206 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
207   CreateIntToFPLocations(arena_, invoke);
208 }
209 
VisitFloatFloatToRawIntBits(HInvoke * invoke)210 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
211   MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
212 }
VisitFloatIntBitsToFloat(HInvoke * invoke)213 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
214   MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
215 }
216 
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)217 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
218   LocationSummary* locations = new (arena) LocationSummary(invoke,
219                                                            LocationSummary::kNoCall,
220                                                            kIntrinsified);
221   locations->SetInAt(0, Location::RequiresRegister());
222   locations->SetOut(Location::SameAsFirstInput());
223 }
224 
GenReverseBytes(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)225 static void GenReverseBytes(LocationSummary* locations,
226                             Primitive::Type size,
227                             X86_64Assembler* assembler) {
228   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
229 
230   switch (size) {
231     case Primitive::kPrimShort:
232       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
233       __ bswapl(out);
234       __ sarl(out, Immediate(16));
235       break;
236     case Primitive::kPrimInt:
237       __ bswapl(out);
238       break;
239     case Primitive::kPrimLong:
240       __ bswapq(out);
241       break;
242     default:
243       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
244       UNREACHABLE();
245   }
246 }
247 
VisitIntegerReverseBytes(HInvoke * invoke)248 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
249   CreateIntToIntLocations(arena_, invoke);
250 }
251 
VisitIntegerReverseBytes(HInvoke * invoke)252 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
253   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
254 }
255 
VisitLongReverseBytes(HInvoke * invoke)256 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
257   CreateIntToIntLocations(arena_, invoke);
258 }
259 
VisitLongReverseBytes(HInvoke * invoke)260 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
261   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
262 }
263 
VisitShortReverseBytes(HInvoke * invoke)264 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
265   CreateIntToIntLocations(arena_, invoke);
266 }
267 
VisitShortReverseBytes(HInvoke * invoke)268 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
269   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
270 }
271 
272 
273 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
274 //       need is 64b.
275 
CreateFloatToFloatPlusTemps(ArenaAllocator * arena,HInvoke * invoke)276 static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
277   // TODO: Enable memory operations when the assembler supports them.
278   LocationSummary* locations = new (arena) LocationSummary(invoke,
279                                                            LocationSummary::kNoCall,
280                                                            kIntrinsified);
281   locations->SetInAt(0, Location::RequiresFpuRegister());
282   // TODO: Allow x86 to work with memory. This requires assembler support, see below.
283   // locations->SetInAt(0, Location::Any());               // X86 can work on memory directly.
284   locations->SetOut(Location::SameAsFirstInput());
285   locations->AddTemp(Location::RequiresFpuRegister());  // FP reg to hold mask.
286 }
287 
MathAbsFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)288 static void MathAbsFP(LocationSummary* locations,
289                       bool is64bit,
290                       X86_64Assembler* assembler,
291                       CodeGeneratorX86_64* codegen) {
292   Location output = locations->Out();
293 
294   if (output.IsFpuRegister()) {
295     // In-register
296     XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
297 
298     // TODO: Can mask directly with constant area using pand if we can guarantee
299     // that the literal is aligned on a 16 byte boundary.  This will avoid a
300     // temporary.
301     if (is64bit) {
302       __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
303       __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
304     } else {
305       __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
306       __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
307     }
308   } else {
309     // TODO: update when assember support is available.
310     UNIMPLEMENTED(FATAL) << "Needs assembler support.";
311 //  Once assembler support is available, in-memory operations look like this:
312 //    if (is64bit) {
313 //      DCHECK(output.IsDoubleStackSlot());
314 //      // No 64b and with literal.
315 //      __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
316 //      __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
317 //    } else {
318 //      DCHECK(output.IsStackSlot());
319 //      // Can use and with a literal directly.
320 //      __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
321 //    }
322   }
323 }
324 
VisitMathAbsDouble(HInvoke * invoke)325 void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
326   CreateFloatToFloatPlusTemps(arena_, invoke);
327 }
328 
VisitMathAbsDouble(HInvoke * invoke)329 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
330   MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
331 }
332 
VisitMathAbsFloat(HInvoke * invoke)333 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
334   CreateFloatToFloatPlusTemps(arena_, invoke);
335 }
336 
VisitMathAbsFloat(HInvoke * invoke)337 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
338   MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
339 }
340 
CreateIntToIntPlusTemp(ArenaAllocator * arena,HInvoke * invoke)341 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
342   LocationSummary* locations = new (arena) LocationSummary(invoke,
343                                                            LocationSummary::kNoCall,
344                                                            kIntrinsified);
345   locations->SetInAt(0, Location::RequiresRegister());
346   locations->SetOut(Location::SameAsFirstInput());
347   locations->AddTemp(Location::RequiresRegister());
348 }
349 
GenAbsInteger(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)350 static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
351   Location output = locations->Out();
352   CpuRegister out = output.AsRegister<CpuRegister>();
353   CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
354 
355   if (is64bit) {
356     // Create mask.
357     __ movq(mask, out);
358     __ sarq(mask, Immediate(63));
359     // Add mask.
360     __ addq(out, mask);
361     __ xorq(out, mask);
362   } else {
363     // Create mask.
364     __ movl(mask, out);
365     __ sarl(mask, Immediate(31));
366     // Add mask.
367     __ addl(out, mask);
368     __ xorl(out, mask);
369   }
370 }
371 
VisitMathAbsInt(HInvoke * invoke)372 void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
373   CreateIntToIntPlusTemp(arena_, invoke);
374 }
375 
VisitMathAbsInt(HInvoke * invoke)376 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
377   GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
378 }
379 
VisitMathAbsLong(HInvoke * invoke)380 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
381   CreateIntToIntPlusTemp(arena_, invoke);
382 }
383 
VisitMathAbsLong(HInvoke * invoke)384 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
385   GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
386 }
387 
GenMinMaxFP(LocationSummary * locations,bool is_min,bool is_double,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)388 static void GenMinMaxFP(LocationSummary* locations,
389                         bool is_min,
390                         bool is_double,
391                         X86_64Assembler* assembler,
392                         CodeGeneratorX86_64* codegen) {
393   Location op1_loc = locations->InAt(0);
394   Location op2_loc = locations->InAt(1);
395   Location out_loc = locations->Out();
396   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
397 
398   // Shortcut for same input locations.
399   if (op1_loc.Equals(op2_loc)) {
400     DCHECK(out_loc.Equals(op1_loc));
401     return;
402   }
403 
404   //  (out := op1)
405   //  out <=? op2
406   //  if Nan jmp Nan_label
407   //  if out is min jmp done
408   //  if op2 is min jmp op2_label
409   //  handle -0/+0
410   //  jmp done
411   // Nan_label:
412   //  out := NaN
413   // op2_label:
414   //  out := op2
415   // done:
416   //
417   // This removes one jmp, but needs to copy one input (op1) to out.
418   //
419   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
420 
421   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
422 
423   Label nan, done, op2_label;
424   if (is_double) {
425     __ ucomisd(out, op2);
426   } else {
427     __ ucomiss(out, op2);
428   }
429 
430   __ j(Condition::kParityEven, &nan);
431 
432   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
433   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
434 
435   // Handle 0.0/-0.0.
436   if (is_min) {
437     if (is_double) {
438       __ orpd(out, op2);
439     } else {
440       __ orps(out, op2);
441     }
442   } else {
443     if (is_double) {
444       __ andpd(out, op2);
445     } else {
446       __ andps(out, op2);
447     }
448   }
449   __ jmp(&done);
450 
451   // NaN handling.
452   __ Bind(&nan);
453   if (is_double) {
454     __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
455   } else {
456     __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
457   }
458   __ jmp(&done);
459 
460   // out := op2;
461   __ Bind(&op2_label);
462   if (is_double) {
463     __ movsd(out, op2);
464   } else {
465     __ movss(out, op2);
466   }
467 
468   // Done.
469   __ Bind(&done);
470 }
471 
CreateFPFPToFP(ArenaAllocator * arena,HInvoke * invoke)472 static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
473   LocationSummary* locations = new (arena) LocationSummary(invoke,
474                                                            LocationSummary::kNoCall,
475                                                            kIntrinsified);
476   locations->SetInAt(0, Location::RequiresFpuRegister());
477   locations->SetInAt(1, Location::RequiresFpuRegister());
478   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
479   // the second input to be the output (we can simply swap inputs).
480   locations->SetOut(Location::SameAsFirstInput());
481 }
482 
VisitMathMinDoubleDouble(HInvoke * invoke)483 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
484   CreateFPFPToFP(arena_, invoke);
485 }
486 
VisitMathMinDoubleDouble(HInvoke * invoke)487 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
488   GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
489 }
490 
VisitMathMinFloatFloat(HInvoke * invoke)491 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
492   CreateFPFPToFP(arena_, invoke);
493 }
494 
VisitMathMinFloatFloat(HInvoke * invoke)495 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
496   GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
497 }
498 
VisitMathMaxDoubleDouble(HInvoke * invoke)499 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
500   CreateFPFPToFP(arena_, invoke);
501 }
502 
VisitMathMaxDoubleDouble(HInvoke * invoke)503 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
504   GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
505 }
506 
VisitMathMaxFloatFloat(HInvoke * invoke)507 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
508   CreateFPFPToFP(arena_, invoke);
509 }
510 
VisitMathMaxFloatFloat(HInvoke * invoke)511 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
512   GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
513 }
514 
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,X86_64Assembler * assembler)515 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
516                       X86_64Assembler* assembler) {
517   Location op1_loc = locations->InAt(0);
518   Location op2_loc = locations->InAt(1);
519 
520   // Shortcut for same input locations.
521   if (op1_loc.Equals(op2_loc)) {
522     // Can return immediately, as op1_loc == out_loc.
523     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
524     //       a copy here.
525     DCHECK(locations->Out().Equals(op1_loc));
526     return;
527   }
528 
529   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
530   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
531 
532   //  (out := op1)
533   //  out <=? op2
534   //  if out is min jmp done
535   //  out := op2
536   // done:
537 
538   if (is_long) {
539     __ cmpq(out, op2);
540   } else {
541     __ cmpl(out, op2);
542   }
543 
544   __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
545 }
546 
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)547 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
548   LocationSummary* locations = new (arena) LocationSummary(invoke,
549                                                            LocationSummary::kNoCall,
550                                                            kIntrinsified);
551   locations->SetInAt(0, Location::RequiresRegister());
552   locations->SetInAt(1, Location::RequiresRegister());
553   locations->SetOut(Location::SameAsFirstInput());
554 }
555 
VisitMathMinIntInt(HInvoke * invoke)556 void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
557   CreateIntIntToIntLocations(arena_, invoke);
558 }
559 
VisitMathMinIntInt(HInvoke * invoke)560 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
561   GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
562 }
563 
VisitMathMinLongLong(HInvoke * invoke)564 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
565   CreateIntIntToIntLocations(arena_, invoke);
566 }
567 
VisitMathMinLongLong(HInvoke * invoke)568 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
569   GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
570 }
571 
VisitMathMaxIntInt(HInvoke * invoke)572 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
573   CreateIntIntToIntLocations(arena_, invoke);
574 }
575 
VisitMathMaxIntInt(HInvoke * invoke)576 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
577   GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
578 }
579 
VisitMathMaxLongLong(HInvoke * invoke)580 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
581   CreateIntIntToIntLocations(arena_, invoke);
582 }
583 
VisitMathMaxLongLong(HInvoke * invoke)584 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
585   GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
586 }
587 
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)588 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
589   LocationSummary* locations = new (arena) LocationSummary(invoke,
590                                                            LocationSummary::kNoCall,
591                                                            kIntrinsified);
592   locations->SetInAt(0, Location::RequiresFpuRegister());
593   locations->SetOut(Location::RequiresFpuRegister());
594 }
595 
VisitMathSqrt(HInvoke * invoke)596 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
597   CreateFPToFPLocations(arena_, invoke);
598 }
599 
VisitMathSqrt(HInvoke * invoke)600 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
601   LocationSummary* locations = invoke->GetLocations();
602   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
603   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
604 
605   GetAssembler()->sqrtsd(out, in);
606 }
607 
InvokeOutOfLineIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke)608 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
609   MoveArguments(invoke, codegen);
610 
611   DCHECK(invoke->IsInvokeStaticOrDirect());
612   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
613   codegen->RecordPcInfo(invoke, invoke->GetDexPc());
614 
615   // Copy the result back to the expected output.
616   Location out = invoke->GetLocations()->Out();
617   if (out.IsValid()) {
618     DCHECK(out.IsRegister());
619     MoveFromReturnRegister(out, invoke->GetType(), codegen);
620   }
621 }
622 
CreateSSE41FPToFPLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)623 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
624                                       HInvoke* invoke,
625                                       CodeGeneratorX86_64* codegen) {
626   // Do we have instruction support?
627   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
628     CreateFPToFPLocations(arena, invoke);
629     return;
630   }
631 
632   // We have to fall back to a call to the intrinsic.
633   LocationSummary* locations = new (arena) LocationSummary(invoke,
634                                                            LocationSummary::kCall);
635   InvokeRuntimeCallingConvention calling_convention;
636   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
637   locations->SetOut(Location::FpuRegisterLocation(XMM0));
638   // Needs to be RDI for the invoke.
639   locations->AddTemp(Location::RegisterLocation(RDI));
640 }
641 
GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke,X86_64Assembler * assembler,int round_mode)642 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
643                                    HInvoke* invoke,
644                                    X86_64Assembler* assembler,
645                                    int round_mode) {
646   LocationSummary* locations = invoke->GetLocations();
647   if (locations->WillCall()) {
648     InvokeOutOfLineIntrinsic(codegen, invoke);
649   } else {
650     XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
651     XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
652     __ roundsd(out, in, Immediate(round_mode));
653   }
654 }
655 
VisitMathCeil(HInvoke * invoke)656 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
657   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
658 }
659 
VisitMathCeil(HInvoke * invoke)660 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
661   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
662 }
663 
VisitMathFloor(HInvoke * invoke)664 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
665   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
666 }
667 
VisitMathFloor(HInvoke * invoke)668 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
669   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
670 }
671 
VisitMathRint(HInvoke * invoke)672 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
673   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
674 }
675 
VisitMathRint(HInvoke * invoke)676 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
677   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
678 }
679 
CreateSSE41FPToIntLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)680 static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
681                                        HInvoke* invoke,
682                                        CodeGeneratorX86_64* codegen) {
683   // Do we have instruction support?
684   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
685     LocationSummary* locations = new (arena) LocationSummary(invoke,
686                                                               LocationSummary::kNoCall,
687                                                               kIntrinsified);
688     locations->SetInAt(0, Location::RequiresFpuRegister());
689     locations->SetOut(Location::RequiresRegister());
690     locations->AddTemp(Location::RequiresFpuRegister());
691     return;
692   }
693 
694   // We have to fall back to a call to the intrinsic.
695   LocationSummary* locations = new (arena) LocationSummary(invoke,
696                                                            LocationSummary::kCall);
697   InvokeRuntimeCallingConvention calling_convention;
698   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
699   locations->SetOut(Location::RegisterLocation(RAX));
700   // Needs to be RDI for the invoke.
701   locations->AddTemp(Location::RegisterLocation(RDI));
702 }
703 
VisitMathRoundFloat(HInvoke * invoke)704 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
705   CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
706 }
707 
VisitMathRoundFloat(HInvoke * invoke)708 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
709   LocationSummary* locations = invoke->GetLocations();
710   if (locations->WillCall()) {
711     InvokeOutOfLineIntrinsic(codegen_, invoke);
712     return;
713   }
714 
715   // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
716   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
717   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
718   XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
719   Label done, nan;
720   X86_64Assembler* assembler = GetAssembler();
721 
722   // Load 0.5 into inPlusPointFive.
723   __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
724 
725   // Add in the input.
726   __ addss(inPlusPointFive, in);
727 
728   // And truncate to an integer.
729   __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
730 
731   // Load maxInt into out.
732   codegen_->Load64BitValue(out, kPrimIntMax);
733 
734   // if inPlusPointFive >= maxInt goto done
735   __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
736   __ j(kAboveEqual, &done);
737 
738   // if input == NaN goto nan
739   __ j(kUnordered, &nan);
740 
741   // output = float-to-int-truncate(input)
742   __ cvttss2si(out, inPlusPointFive);
743   __ jmp(&done);
744   __ Bind(&nan);
745 
746   //  output = 0
747   __ xorl(out, out);
748   __ Bind(&done);
749 }
750 
VisitMathRoundDouble(HInvoke * invoke)751 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
752   CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
753 }
754 
VisitMathRoundDouble(HInvoke * invoke)755 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
756   LocationSummary* locations = invoke->GetLocations();
757   if (locations->WillCall()) {
758     InvokeOutOfLineIntrinsic(codegen_, invoke);
759     return;
760   }
761 
762   // Implement RoundDouble as t1 = floor(input + 0.5);  convert to long.
763   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
764   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
765   XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
766   Label done, nan;
767   X86_64Assembler* assembler = GetAssembler();
768 
769   // Load 0.5 into inPlusPointFive.
770   __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
771 
772   // Add in the input.
773   __ addsd(inPlusPointFive, in);
774 
775   // And truncate to an integer.
776   __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
777 
778   // Load maxLong into out.
779   codegen_->Load64BitValue(out, kPrimLongMax);
780 
781   // if inPlusPointFive >= maxLong goto done
782   __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
783   __ j(kAboveEqual, &done);
784 
785   // if input == NaN goto nan
786   __ j(kUnordered, &nan);
787 
788   // output = double-to-long-truncate(input)
789   __ cvttsd2si(out, inPlusPointFive, true);
790   __ jmp(&done);
791   __ Bind(&nan);
792 
793   //  output = 0
794   __ xorl(out, out);
795   __ Bind(&done);
796 }
797 
VisitStringCharAt(HInvoke * invoke)798 void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
799   // The inputs plus one temp.
800   LocationSummary* locations = new (arena_) LocationSummary(invoke,
801                                                             LocationSummary::kCallOnSlowPath,
802                                                             kIntrinsified);
803   locations->SetInAt(0, Location::RequiresRegister());
804   locations->SetInAt(1, Location::RequiresRegister());
805   locations->SetOut(Location::SameAsFirstInput());
806   locations->AddTemp(Location::RequiresRegister());
807 }
808 
VisitStringCharAt(HInvoke * invoke)809 void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
810   LocationSummary* locations = invoke->GetLocations();
811 
812   // Location of reference to data array
813   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
814   // Location of count
815   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
816 
817   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
818   CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
819   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
820 
821   // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
822   //       the cost.
823   // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
824   //       we will not optimize the code for constants (which would save a register).
825 
826   SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
827   codegen_->AddSlowPath(slow_path);
828 
829   X86_64Assembler* assembler = GetAssembler();
830 
831   __ cmpl(idx, Address(obj, count_offset));
832   codegen_->MaybeRecordImplicitNullCheck(invoke);
833   __ j(kAboveEqual, slow_path->GetEntryLabel());
834 
835   // out = out[2*idx].
836   __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
837 
838   __ Bind(slow_path->GetExitLabel());
839 }
840 
VisitStringCompareTo(HInvoke * invoke)841 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
842   LocationSummary* locations = new (arena_) LocationSummary(invoke,
843                                                             LocationSummary::kCall,
844                                                             kIntrinsified);
845   InvokeRuntimeCallingConvention calling_convention;
846   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
847   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
848   locations->SetOut(Location::RegisterLocation(RAX));
849 }
850 
VisitStringCompareTo(HInvoke * invoke)851 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
852   X86_64Assembler* assembler = GetAssembler();
853   LocationSummary* locations = invoke->GetLocations();
854 
855   // Note that the null check must have been done earlier.
856   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
857 
858   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
859   __ testl(argument, argument);
860   SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
861   codegen_->AddSlowPath(slow_path);
862   __ j(kEqual, slow_path->GetEntryLabel());
863 
864   __ gs()->call(Address::Absolute(
865         QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
866   __ Bind(slow_path->GetExitLabel());
867 }
868 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)869 static void CreateStringIndexOfLocations(HInvoke* invoke,
870                                          ArenaAllocator* allocator,
871                                          bool start_at_zero) {
872   LocationSummary* locations = new (allocator) LocationSummary(invoke,
873                                                                LocationSummary::kCallOnSlowPath,
874                                                                kIntrinsified);
875   // The data needs to be in RDI for scasw. So request that the string is there, anyways.
876   locations->SetInAt(0, Location::RegisterLocation(RDI));
877   // If we look for a constant char, we'll still have to copy it into RAX. So just request the
878   // allocator to do that, anyways. We can still do the constant check by checking the parameter
879   // of the instruction explicitly.
880   // Note: This works as we don't clobber RAX anywhere.
881   locations->SetInAt(1, Location::RegisterLocation(RAX));
882   if (!start_at_zero) {
883     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
884   }
885   // As we clobber RDI during execution anyways, also use it as the output.
886   locations->SetOut(Location::SameAsFirstInput());
887 
888   // repne scasw uses RCX as the counter.
889   locations->AddTemp(Location::RegisterLocation(RCX));
890   // Need another temporary to be able to compute the result.
891   locations->AddTemp(Location::RequiresRegister());
892 }
893 
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,ArenaAllocator * allocator,bool start_at_zero)894 static void GenerateStringIndexOf(HInvoke* invoke,
895                                   X86_64Assembler* assembler,
896                                   CodeGeneratorX86_64* codegen,
897                                   ArenaAllocator* allocator,
898                                   bool start_at_zero) {
899   LocationSummary* locations = invoke->GetLocations();
900 
901   // Note that the null check must have been done earlier.
902   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
903 
904   CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
905   CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
906   CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
907   CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
908   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
909 
910   // Check our assumptions for registers.
911   DCHECK_EQ(string_obj.AsRegister(), RDI);
912   DCHECK_EQ(search_value.AsRegister(), RAX);
913   DCHECK_EQ(counter.AsRegister(), RCX);
914   DCHECK_EQ(out.AsRegister(), RDI);
915 
916   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
917   // or directly dispatch if we have a constant.
918   SlowPathCodeX86_64* slow_path = nullptr;
919   if (invoke->InputAt(1)->IsIntConstant()) {
920     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
921     std::numeric_limits<uint16_t>::max()) {
922       // Always needs the slow-path. We could directly dispatch to it, but this case should be
923       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
924       slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
925       codegen->AddSlowPath(slow_path);
926       __ jmp(slow_path->GetEntryLabel());
927       __ Bind(slow_path->GetExitLabel());
928       return;
929     }
930   } else {
931     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
932     slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
933     codegen->AddSlowPath(slow_path);
934     __ j(kAbove, slow_path->GetEntryLabel());
935   }
936 
937   // From here down, we know that we are looking for a char that fits in 16 bits.
938   // Location of reference to data array within the String object.
939   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
940   // Location of count within the String object.
941   int32_t count_offset = mirror::String::CountOffset().Int32Value();
942 
943   // Load string length, i.e., the count field of the string.
944   __ movl(string_length, Address(string_obj, count_offset));
945 
946   // Do a length check.
947   // TODO: Support jecxz.
948   Label not_found_label;
949   __ testl(string_length, string_length);
950   __ j(kEqual, &not_found_label);
951 
952   if (start_at_zero) {
953     // Number of chars to scan is the same as the string length.
954     __ movl(counter, string_length);
955 
956     // Move to the start of the string.
957     __ addq(string_obj, Immediate(value_offset));
958   } else {
959     CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
960 
961     // Do a start_index check.
962     __ cmpl(start_index, string_length);
963     __ j(kGreaterEqual, &not_found_label);
964 
965     // Ensure we have a start index >= 0;
966     __ xorl(counter, counter);
967     __ cmpl(start_index, Immediate(0));
968     __ cmov(kGreater, counter, start_index, false);  // 32-bit copy is enough.
969 
970     // Move to the start of the string: string_obj + value_offset + 2 * start_index.
971     __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
972 
973     // Now update ecx, the work counter: it's gonna be string.length - start_index.
974     __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
975     __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
976   }
977 
978   // Everything is set up for repne scasw:
979   //   * Comparison address in RDI.
980   //   * Counter in ECX.
981   __ repne_scasw();
982 
983   // Did we find a match?
984   __ j(kNotEqual, &not_found_label);
985 
986   // Yes, we matched.  Compute the index of the result.
987   __ subl(string_length, counter);
988   __ leal(out, Address(string_length, -1));
989 
990   Label done;
991   __ jmp(&done);
992 
993   // Failed to match; return -1.
994   __ Bind(&not_found_label);
995   __ movl(out, Immediate(-1));
996 
997   // And join up at the end.
998   __ Bind(&done);
999   if (slow_path != nullptr) {
1000     __ Bind(slow_path->GetExitLabel());
1001   }
1002 }
1003 
VisitStringIndexOf(HInvoke * invoke)1004 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1005   CreateStringIndexOfLocations(invoke, arena_, true);
1006 }
1007 
VisitStringIndexOf(HInvoke * invoke)1008 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1009   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
1010 }
1011 
VisitStringIndexOfAfter(HInvoke * invoke)1012 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1013   CreateStringIndexOfLocations(invoke, arena_, false);
1014 }
1015 
VisitStringIndexOfAfter(HInvoke * invoke)1016 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1017   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
1018 }
1019 
VisitStringNewStringFromBytes(HInvoke * invoke)1020 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1021   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1022                                                             LocationSummary::kCall,
1023                                                             kIntrinsified);
1024   InvokeRuntimeCallingConvention calling_convention;
1025   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1026   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1027   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1028   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1029   locations->SetOut(Location::RegisterLocation(RAX));
1030 }
1031 
VisitStringNewStringFromBytes(HInvoke * invoke)1032 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1033   X86_64Assembler* assembler = GetAssembler();
1034   LocationSummary* locations = invoke->GetLocations();
1035 
1036   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1037   __ testl(byte_array, byte_array);
1038   SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1039   codegen_->AddSlowPath(slow_path);
1040   __ j(kEqual, slow_path->GetEntryLabel());
1041 
1042   __ gs()->call(Address::Absolute(
1043         QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
1044   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1045   __ Bind(slow_path->GetExitLabel());
1046 }
1047 
VisitStringNewStringFromChars(HInvoke * invoke)1048 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1049   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1050                                                             LocationSummary::kCall,
1051                                                             kIntrinsified);
1052   InvokeRuntimeCallingConvention calling_convention;
1053   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1054   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1055   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1056   locations->SetOut(Location::RegisterLocation(RAX));
1057 }
1058 
VisitStringNewStringFromChars(HInvoke * invoke)1059 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1060   X86_64Assembler* assembler = GetAssembler();
1061 
1062   __ gs()->call(Address::Absolute(
1063         QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
1064   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1065 }
1066 
VisitStringNewStringFromString(HInvoke * invoke)1067 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1068   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1069                                                             LocationSummary::kCall,
1070                                                             kIntrinsified);
1071   InvokeRuntimeCallingConvention calling_convention;
1072   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1073   locations->SetOut(Location::RegisterLocation(RAX));
1074 }
1075 
VisitStringNewStringFromString(HInvoke * invoke)1076 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1077   X86_64Assembler* assembler = GetAssembler();
1078   LocationSummary* locations = invoke->GetLocations();
1079 
1080   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1081   __ testl(string_to_copy, string_to_copy);
1082   SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1083   codegen_->AddSlowPath(slow_path);
1084   __ j(kEqual, slow_path->GetEntryLabel());
1085 
1086   __ gs()->call(Address::Absolute(
1087         QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
1088   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1089   __ Bind(slow_path->GetExitLabel());
1090 }
1091 
GenPeek(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)1092 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1093   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1094   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
1095   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1096   // to avoid a SIGBUS.
1097   switch (size) {
1098     case Primitive::kPrimByte:
1099       __ movsxb(out, Address(address, 0));
1100       break;
1101     case Primitive::kPrimShort:
1102       __ movsxw(out, Address(address, 0));
1103       break;
1104     case Primitive::kPrimInt:
1105       __ movl(out, Address(address, 0));
1106       break;
1107     case Primitive::kPrimLong:
1108       __ movq(out, Address(address, 0));
1109       break;
1110     default:
1111       LOG(FATAL) << "Type not recognized for peek: " << size;
1112       UNREACHABLE();
1113   }
1114 }
1115 
VisitMemoryPeekByte(HInvoke * invoke)1116 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1117   CreateIntToIntLocations(arena_, invoke);
1118 }
1119 
VisitMemoryPeekByte(HInvoke * invoke)1120 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1121   GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1122 }
1123 
VisitMemoryPeekIntNative(HInvoke * invoke)1124 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1125   CreateIntToIntLocations(arena_, invoke);
1126 }
1127 
VisitMemoryPeekIntNative(HInvoke * invoke)1128 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1129   GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1130 }
1131 
VisitMemoryPeekLongNative(HInvoke * invoke)1132 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1133   CreateIntToIntLocations(arena_, invoke);
1134 }
1135 
VisitMemoryPeekLongNative(HInvoke * invoke)1136 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1137   GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1138 }
1139 
VisitMemoryPeekShortNative(HInvoke * invoke)1140 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1141   CreateIntToIntLocations(arena_, invoke);
1142 }
1143 
VisitMemoryPeekShortNative(HInvoke * invoke)1144 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1145   GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1146 }
1147 
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)1148 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1149   LocationSummary* locations = new (arena) LocationSummary(invoke,
1150                                                            LocationSummary::kNoCall,
1151                                                            kIntrinsified);
1152   locations->SetInAt(0, Location::RequiresRegister());
1153   locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
1154 }
1155 
GenPoke(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)1156 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1157   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1158   Location value = locations->InAt(1);
1159   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1160   // to avoid a SIGBUS.
1161   switch (size) {
1162     case Primitive::kPrimByte:
1163       if (value.IsConstant()) {
1164         __ movb(Address(address, 0),
1165                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1166       } else {
1167         __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1168       }
1169       break;
1170     case Primitive::kPrimShort:
1171       if (value.IsConstant()) {
1172         __ movw(Address(address, 0),
1173                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1174       } else {
1175         __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1176       }
1177       break;
1178     case Primitive::kPrimInt:
1179       if (value.IsConstant()) {
1180         __ movl(Address(address, 0),
1181                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1182       } else {
1183         __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1184       }
1185       break;
1186     case Primitive::kPrimLong:
1187       if (value.IsConstant()) {
1188         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1189         DCHECK(IsInt<32>(v));
1190         int32_t v_32 = v;
1191         __ movq(Address(address, 0), Immediate(v_32));
1192       } else {
1193         __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1194       }
1195       break;
1196     default:
1197       LOG(FATAL) << "Type not recognized for poke: " << size;
1198       UNREACHABLE();
1199   }
1200 }
1201 
VisitMemoryPokeByte(HInvoke * invoke)1202 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1203   CreateIntIntToVoidLocations(arena_, invoke);
1204 }
1205 
VisitMemoryPokeByte(HInvoke * invoke)1206 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1207   GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1208 }
1209 
VisitMemoryPokeIntNative(HInvoke * invoke)1210 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1211   CreateIntIntToVoidLocations(arena_, invoke);
1212 }
1213 
VisitMemoryPokeIntNative(HInvoke * invoke)1214 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1215   GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1216 }
1217 
VisitMemoryPokeLongNative(HInvoke * invoke)1218 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1219   CreateIntIntToVoidLocations(arena_, invoke);
1220 }
1221 
VisitMemoryPokeLongNative(HInvoke * invoke)1222 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1223   GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1224 }
1225 
VisitMemoryPokeShortNative(HInvoke * invoke)1226 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1227   CreateIntIntToVoidLocations(arena_, invoke);
1228 }
1229 
VisitMemoryPokeShortNative(HInvoke * invoke)1230 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1231   GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1232 }
1233 
VisitThreadCurrentThread(HInvoke * invoke)1234 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1235   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1236                                                             LocationSummary::kNoCall,
1237                                                             kIntrinsified);
1238   locations->SetOut(Location::RequiresRegister());
1239 }
1240 
VisitThreadCurrentThread(HInvoke * invoke)1241 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1242   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1243   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1244 }
1245 
GenUnsafeGet(LocationSummary * locations,Primitive::Type type,bool is_volatile ATTRIBUTE_UNUSED,X86_64Assembler * assembler)1246 static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
1247                          bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1248   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1249   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1250   CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1251 
1252   switch (type) {
1253     case Primitive::kPrimInt:
1254     case Primitive::kPrimNot:
1255       __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1256       break;
1257 
1258     case Primitive::kPrimLong:
1259       __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1260       break;
1261 
1262     default:
1263       LOG(FATAL) << "Unsupported op size " << type;
1264       UNREACHABLE();
1265   }
1266 }
1267 
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)1268 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1269   LocationSummary* locations = new (arena) LocationSummary(invoke,
1270                                                            LocationSummary::kNoCall,
1271                                                            kIntrinsified);
1272   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1273   locations->SetInAt(1, Location::RequiresRegister());
1274   locations->SetInAt(2, Location::RequiresRegister());
1275   locations->SetOut(Location::RequiresRegister());
1276 }
1277 
VisitUnsafeGet(HInvoke * invoke)1278 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1279   CreateIntIntIntToIntLocations(arena_, invoke);
1280 }
VisitUnsafeGetVolatile(HInvoke * invoke)1281 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1282   CreateIntIntIntToIntLocations(arena_, invoke);
1283 }
VisitUnsafeGetLong(HInvoke * invoke)1284 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1285   CreateIntIntIntToIntLocations(arena_, invoke);
1286 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1287 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1288   CreateIntIntIntToIntLocations(arena_, invoke);
1289 }
VisitUnsafeGetObject(HInvoke * invoke)1290 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1291   CreateIntIntIntToIntLocations(arena_, invoke);
1292 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1293 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1294   CreateIntIntIntToIntLocations(arena_, invoke);
1295 }
1296 
1297 
VisitUnsafeGet(HInvoke * invoke)1298 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
1299   GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
1300 }
VisitUnsafeGetVolatile(HInvoke * invoke)1301 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1302   GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
1303 }
VisitUnsafeGetLong(HInvoke * invoke)1304 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1305   GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
1306 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1307 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1308   GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
1309 }
VisitUnsafeGetObject(HInvoke * invoke)1310 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1311   GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1312 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1313 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1314   GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1315 }
1316 
1317 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)1318 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1319                                                        Primitive::Type type,
1320                                                        HInvoke* invoke) {
1321   LocationSummary* locations = new (arena) LocationSummary(invoke,
1322                                                            LocationSummary::kNoCall,
1323                                                            kIntrinsified);
1324   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1325   locations->SetInAt(1, Location::RequiresRegister());
1326   locations->SetInAt(2, Location::RequiresRegister());
1327   locations->SetInAt(3, Location::RequiresRegister());
1328   if (type == Primitive::kPrimNot) {
1329     // Need temp registers for card-marking.
1330     locations->AddTemp(Location::RequiresRegister());
1331     locations->AddTemp(Location::RequiresRegister());
1332   }
1333 }
1334 
VisitUnsafePut(HInvoke * invoke)1335 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1336   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1337 }
VisitUnsafePutOrdered(HInvoke * invoke)1338 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1339   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1340 }
VisitUnsafePutVolatile(HInvoke * invoke)1341 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1342   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1343 }
VisitUnsafePutObject(HInvoke * invoke)1344 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1345   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1346 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1347 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1348   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1349 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1350 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1351   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1352 }
VisitUnsafePutLong(HInvoke * invoke)1353 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1354   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1355 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1356 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1357   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1358 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1359 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1360   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1361 }
1362 
1363 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1364 // memory model.
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1365 static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1366                          CodeGeneratorX86_64* codegen) {
1367   X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1368   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1369   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1370   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1371 
1372   if (type == Primitive::kPrimLong) {
1373     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1374   } else {
1375     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1376   }
1377 
1378   if (is_volatile) {
1379     __ mfence();
1380   }
1381 
1382   if (type == Primitive::kPrimNot) {
1383     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1384                         locations->GetTemp(1).AsRegister<CpuRegister>(),
1385                         base,
1386                         value);
1387   }
1388 }
1389 
VisitUnsafePut(HInvoke * invoke)1390 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1391   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1392 }
VisitUnsafePutOrdered(HInvoke * invoke)1393 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1394   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1395 }
VisitUnsafePutVolatile(HInvoke * invoke)1396 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1397   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1398 }
VisitUnsafePutObject(HInvoke * invoke)1399 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1400   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1401 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1402 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1403   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1404 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1405 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1406   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1407 }
VisitUnsafePutLong(HInvoke * invoke)1408 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1409   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1410 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1411 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1412   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1413 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1414 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1415   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1416 }
1417 
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)1418 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1419                                        HInvoke* invoke) {
1420   LocationSummary* locations = new (arena) LocationSummary(invoke,
1421                                                            LocationSummary::kNoCall,
1422                                                            kIntrinsified);
1423   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1424   locations->SetInAt(1, Location::RequiresRegister());
1425   locations->SetInAt(2, Location::RequiresRegister());
1426   // expected value must be in EAX/RAX.
1427   locations->SetInAt(3, Location::RegisterLocation(RAX));
1428   locations->SetInAt(4, Location::RequiresRegister());
1429 
1430   locations->SetOut(Location::RequiresRegister());
1431   if (type == Primitive::kPrimNot) {
1432     // Need temp registers for card-marking.
1433     locations->AddTemp(Location::RequiresRegister());
1434     locations->AddTemp(Location::RequiresRegister());
1435   }
1436 }
1437 
VisitUnsafeCASInt(HInvoke * invoke)1438 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1439   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1440 }
1441 
VisitUnsafeCASLong(HInvoke * invoke)1442 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1443   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1444 }
1445 
VisitUnsafeCASObject(HInvoke * invoke)1446 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1447   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1448 }
1449 
GenCAS(Primitive::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)1450 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1451   X86_64Assembler* assembler =
1452     reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1453   LocationSummary* locations = invoke->GetLocations();
1454 
1455   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1456   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1457   CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1458   DCHECK_EQ(expected.AsRegister(), RAX);
1459   CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1460   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1461 
1462   if (type == Primitive::kPrimLong) {
1463     __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1464   } else {
1465     // Integer or object.
1466     if (type == Primitive::kPrimNot) {
1467       // Mark card for object assuming new value is stored.
1468       codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1469                           locations->GetTemp(1).AsRegister<CpuRegister>(),
1470                           base,
1471                           value);
1472     }
1473 
1474     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1475   }
1476 
1477   // locked cmpxchg has full barrier semantics, and we don't need scheduling
1478   // barriers at this time.
1479 
1480   // Convert ZF into the boolean result.
1481   __ setcc(kZero, out);
1482   __ movzxb(out, out);
1483 }
1484 
VisitUnsafeCASInt(HInvoke * invoke)1485 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1486   GenCAS(Primitive::kPrimInt, invoke, codegen_);
1487 }
1488 
VisitUnsafeCASLong(HInvoke * invoke)1489 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1490   GenCAS(Primitive::kPrimLong, invoke, codegen_);
1491 }
1492 
VisitUnsafeCASObject(HInvoke * invoke)1493 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1494   GenCAS(Primitive::kPrimNot, invoke, codegen_);
1495 }
1496 
VisitIntegerReverse(HInvoke * invoke)1497 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1498   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1499                                                            LocationSummary::kNoCall,
1500                                                            kIntrinsified);
1501   locations->SetInAt(0, Location::RequiresRegister());
1502   locations->SetOut(Location::SameAsFirstInput());
1503   locations->AddTemp(Location::RequiresRegister());
1504 }
1505 
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)1506 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1507                      X86_64Assembler* assembler) {
1508   Immediate imm_shift(shift);
1509   Immediate imm_mask(mask);
1510   __ movl(temp, reg);
1511   __ shrl(reg, imm_shift);
1512   __ andl(temp, imm_mask);
1513   __ andl(reg, imm_mask);
1514   __ shll(temp, imm_shift);
1515   __ orl(reg, temp);
1516 }
1517 
VisitIntegerReverse(HInvoke * invoke)1518 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1519   X86_64Assembler* assembler =
1520     reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1521   LocationSummary* locations = invoke->GetLocations();
1522 
1523   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1524   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1525 
1526   /*
1527    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1528    * swapping bits to reverse bits in a number x. Using bswap to save instructions
1529    * compared to generic luni implementation which has 5 rounds of swapping bits.
1530    * x = bswap x
1531    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1532    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1533    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1534    */
1535   __ bswapl(reg);
1536   SwapBits(reg, temp, 1, 0x55555555, assembler);
1537   SwapBits(reg, temp, 2, 0x33333333, assembler);
1538   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1539 }
1540 
VisitLongReverse(HInvoke * invoke)1541 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1542   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1543                                                            LocationSummary::kNoCall,
1544                                                            kIntrinsified);
1545   locations->SetInAt(0, Location::RequiresRegister());
1546   locations->SetOut(Location::SameAsFirstInput());
1547   locations->AddTemp(Location::RequiresRegister());
1548   locations->AddTemp(Location::RequiresRegister());
1549 }
1550 
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)1551 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1552                        int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1553   Immediate imm_shift(shift);
1554   __ movq(temp_mask, Immediate(mask));
1555   __ movq(temp, reg);
1556   __ shrq(reg, imm_shift);
1557   __ andq(temp, temp_mask);
1558   __ andq(reg, temp_mask);
1559   __ shlq(temp, imm_shift);
1560   __ orq(reg, temp);
1561 }
1562 
VisitLongReverse(HInvoke * invoke)1563 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1564   X86_64Assembler* assembler =
1565     reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1566   LocationSummary* locations = invoke->GetLocations();
1567 
1568   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1569   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1570   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1571 
1572   /*
1573    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1574    * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1575    * compared to generic luni implementation which has 5 rounds of swapping bits.
1576    * x = bswap x
1577    * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1578    * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1579    * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1580    */
1581   __ bswapq(reg);
1582   SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1583   SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1584   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1585 }
1586 
1587 // Unimplemented intrinsics.
1588 
1589 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
1590 void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1591 }                                                                                       \
1592 void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
1593 }
1594 
1595 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
1596 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
1597 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1598 
1599 }  // namespace x86_64
1600 }  // namespace art
1601