1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86.h"
18 
19 #include <limits>
20 
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "intrinsics.h"
27 #include "intrinsics_utils.h"
28 #include "mirror/array-inl.h"
29 #include "mirror/string.h"
30 #include "thread.h"
31 #include "utils/x86/assembler_x86.h"
32 #include "utils/x86/constants_x86.h"
33 
34 namespace art {
35 
36 namespace x86 {
37 
38 static constexpr int kDoubleNaNHigh = 0x7FF80000;
39 static constexpr int kDoubleNaNLow = 0x00000000;
40 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
41 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
42 
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)43 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
44   : arena_(codegen->GetGraph()->GetArena()),
45     codegen_(codegen) {
46 }
47 
48 
GetAssembler()49 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
50   return down_cast<X86Assembler*>(codegen_->GetAssembler());
51 }
52 
GetAllocator()53 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
54   return codegen_->GetGraph()->GetArena();
55 }
56 
TryDispatch(HInvoke * invoke)57 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
58   Dispatch(invoke);
59   LocationSummary* res = invoke->GetLocations();
60   if (res == nullptr) {
61     return false;
62   }
63   if (kEmitCompilerReadBarrier && res->CanCall()) {
64     // Generating an intrinsic for this HInvoke may produce an
65     // IntrinsicSlowPathX86 slow path.  Currently this approach
66     // does not work when using read barriers, as the emitted
67     // calling sequence will make use of another slow path
68     // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect,
69     // ReadBarrierSlowPathX86 for HInvokeVirtual).  So we bail
70     // out in this case.
71     //
72     // TODO: Find a way to have intrinsics work with read barriers.
73     invoke->SetLocations(nullptr);
74     return false;
75   }
76   return res->Intrinsified();
77 }
78 
MoveArguments(HInvoke * invoke,CodeGeneratorX86 * codegen)79 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
80   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
81   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
82 }
83 
84 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
85 
86 #define __ assembler->
87 
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke,bool is64bit)88 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
89   LocationSummary* locations = new (arena) LocationSummary(invoke,
90                                                            LocationSummary::kNoCall,
91                                                            kIntrinsified);
92   locations->SetInAt(0, Location::RequiresFpuRegister());
93   locations->SetOut(Location::RequiresRegister());
94   if (is64bit) {
95     locations->AddTemp(Location::RequiresFpuRegister());
96   }
97 }
98 
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke,bool is64bit)99 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
100   LocationSummary* locations = new (arena) LocationSummary(invoke,
101                                                            LocationSummary::kNoCall,
102                                                            kIntrinsified);
103   locations->SetInAt(0, Location::RequiresRegister());
104   locations->SetOut(Location::RequiresFpuRegister());
105   if (is64bit) {
106     locations->AddTemp(Location::RequiresFpuRegister());
107     locations->AddTemp(Location::RequiresFpuRegister());
108   }
109 }
110 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)111 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
112   Location input = locations->InAt(0);
113   Location output = locations->Out();
114   if (is64bit) {
115     // Need to use the temporary.
116     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
117     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
118     __ movd(output.AsRegisterPairLow<Register>(), temp);
119     __ psrlq(temp, Immediate(32));
120     __ movd(output.AsRegisterPairHigh<Register>(), temp);
121   } else {
122     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
123   }
124 }
125 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)126 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
127   Location input = locations->InAt(0);
128   Location output = locations->Out();
129   if (is64bit) {
130     // Need to use the temporary.
131     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
132     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
133     __ movd(temp1, input.AsRegisterPairLow<Register>());
134     __ movd(temp2, input.AsRegisterPairHigh<Register>());
135     __ punpckldq(temp1, temp2);
136     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
137   } else {
138     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
139   }
140 }
141 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)142 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
143   CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
144 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)145 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
146   CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
147 }
148 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)149 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
150   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
151 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)152 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
153   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
154 }
155 
VisitFloatFloatToRawIntBits(HInvoke * invoke)156 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
157   CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
158 }
VisitFloatIntBitsToFloat(HInvoke * invoke)159 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
160   CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
161 }
162 
VisitFloatFloatToRawIntBits(HInvoke * invoke)163 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
164   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
165 }
VisitFloatIntBitsToFloat(HInvoke * invoke)166 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
167   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
168 }
169 
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)170 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
171   LocationSummary* locations = new (arena) LocationSummary(invoke,
172                                                            LocationSummary::kNoCall,
173                                                            kIntrinsified);
174   locations->SetInAt(0, Location::RequiresRegister());
175   locations->SetOut(Location::SameAsFirstInput());
176 }
177 
CreateLongToIntLocations(ArenaAllocator * arena,HInvoke * invoke)178 static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
179   LocationSummary* locations = new (arena) LocationSummary(invoke,
180                                                            LocationSummary::kNoCall,
181                                                            kIntrinsified);
182   locations->SetInAt(0, Location::RequiresRegister());
183   locations->SetOut(Location::RequiresRegister());
184 }
185 
CreateLongToLongLocations(ArenaAllocator * arena,HInvoke * invoke)186 static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
187   LocationSummary* locations = new (arena) LocationSummary(invoke,
188                                                            LocationSummary::kNoCall,
189                                                            kIntrinsified);
190   locations->SetInAt(0, Location::RequiresRegister());
191   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
192 }
193 
GenReverseBytes(LocationSummary * locations,Primitive::Type size,X86Assembler * assembler)194 static void GenReverseBytes(LocationSummary* locations,
195                             Primitive::Type size,
196                             X86Assembler* assembler) {
197   Register out = locations->Out().AsRegister<Register>();
198 
199   switch (size) {
200     case Primitive::kPrimShort:
201       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
202       __ bswapl(out);
203       __ sarl(out, Immediate(16));
204       break;
205     case Primitive::kPrimInt:
206       __ bswapl(out);
207       break;
208     default:
209       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
210       UNREACHABLE();
211   }
212 }
213 
VisitIntegerReverseBytes(HInvoke * invoke)214 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
215   CreateIntToIntLocations(arena_, invoke);
216 }
217 
VisitIntegerReverseBytes(HInvoke * invoke)218 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
219   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
220 }
221 
VisitLongReverseBytes(HInvoke * invoke)222 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
223   CreateLongToLongLocations(arena_, invoke);
224 }
225 
VisitLongReverseBytes(HInvoke * invoke)226 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
227   LocationSummary* locations = invoke->GetLocations();
228   Location input = locations->InAt(0);
229   Register input_lo = input.AsRegisterPairLow<Register>();
230   Register input_hi = input.AsRegisterPairHigh<Register>();
231   Location output = locations->Out();
232   Register output_lo = output.AsRegisterPairLow<Register>();
233   Register output_hi = output.AsRegisterPairHigh<Register>();
234 
235   X86Assembler* assembler = GetAssembler();
236   // Assign the inputs to the outputs, mixing low/high.
237   __ movl(output_lo, input_hi);
238   __ movl(output_hi, input_lo);
239   __ bswapl(output_lo);
240   __ bswapl(output_hi);
241 }
242 
VisitShortReverseBytes(HInvoke * invoke)243 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
244   CreateIntToIntLocations(arena_, invoke);
245 }
246 
VisitShortReverseBytes(HInvoke * invoke)247 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
248   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
249 }
250 
251 
252 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
253 //       need is 64b.
254 
CreateFloatToFloat(ArenaAllocator * arena,HInvoke * invoke)255 static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
256   // TODO: Enable memory operations when the assembler supports them.
257   LocationSummary* locations = new (arena) LocationSummary(invoke,
258                                                            LocationSummary::kNoCall,
259                                                            kIntrinsified);
260   locations->SetInAt(0, Location::RequiresFpuRegister());
261   locations->SetOut(Location::SameAsFirstInput());
262   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
263   DCHECK(static_or_direct != nullptr);
264   if (static_or_direct->HasSpecialInput() &&
265       invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
266     // We need addressibility for the constant area.
267     locations->SetInAt(1, Location::RequiresRegister());
268     // We need a temporary to hold the constant.
269     locations->AddTemp(Location::RequiresFpuRegister());
270   }
271 }
272 
MathAbsFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler,CodeGeneratorX86 * codegen)273 static void MathAbsFP(LocationSummary* locations,
274                       bool is64bit,
275                       X86Assembler* assembler,
276                       CodeGeneratorX86* codegen) {
277   Location output = locations->Out();
278 
279   DCHECK(output.IsFpuRegister());
280   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
281     DCHECK(locations->InAt(1).IsRegister());
282     // We also have a constant area pointer.
283     Register constant_area = locations->InAt(1).AsRegister<Register>();
284     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
285     if (is64bit) {
286       __ movsd(temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF), constant_area));
287       __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
288     } else {
289       __ movss(temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF), constant_area));
290       __ andps(output.AsFpuRegister<XmmRegister>(), temp);
291     }
292   } else {
293     // Create the right constant on an aligned stack.
294     if (is64bit) {
295       __ subl(ESP, Immediate(8));
296       __ pushl(Immediate(0x7FFFFFFF));
297       __ pushl(Immediate(0xFFFFFFFF));
298       __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
299     } else {
300       __ subl(ESP, Immediate(12));
301       __ pushl(Immediate(0x7FFFFFFF));
302       __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
303     }
304     __ addl(ESP, Immediate(16));
305   }
306 }
307 
VisitMathAbsDouble(HInvoke * invoke)308 void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
309   CreateFloatToFloat(arena_, invoke);
310 }
311 
VisitMathAbsDouble(HInvoke * invoke)312 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
313   MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
314 }
315 
VisitMathAbsFloat(HInvoke * invoke)316 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
317   CreateFloatToFloat(arena_, invoke);
318 }
319 
VisitMathAbsFloat(HInvoke * invoke)320 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
321   MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
322 }
323 
CreateAbsIntLocation(ArenaAllocator * arena,HInvoke * invoke)324 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
325   LocationSummary* locations = new (arena) LocationSummary(invoke,
326                                                            LocationSummary::kNoCall,
327                                                            kIntrinsified);
328   locations->SetInAt(0, Location::RegisterLocation(EAX));
329   locations->SetOut(Location::SameAsFirstInput());
330   locations->AddTemp(Location::RegisterLocation(EDX));
331 }
332 
GenAbsInteger(LocationSummary * locations,X86Assembler * assembler)333 static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
334   Location output = locations->Out();
335   Register out = output.AsRegister<Register>();
336   DCHECK_EQ(out, EAX);
337   Register temp = locations->GetTemp(0).AsRegister<Register>();
338   DCHECK_EQ(temp, EDX);
339 
340   // Sign extend EAX into EDX.
341   __ cdq();
342 
343   // XOR EAX with sign.
344   __ xorl(EAX, EDX);
345 
346   // Subtract out sign to correct.
347   __ subl(EAX, EDX);
348 
349   // The result is in EAX.
350 }
351 
CreateAbsLongLocation(ArenaAllocator * arena,HInvoke * invoke)352 static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
353   LocationSummary* locations = new (arena) LocationSummary(invoke,
354                                                            LocationSummary::kNoCall,
355                                                            kIntrinsified);
356   locations->SetInAt(0, Location::RequiresRegister());
357   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
358   locations->AddTemp(Location::RequiresRegister());
359 }
360 
GenAbsLong(LocationSummary * locations,X86Assembler * assembler)361 static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
362   Location input = locations->InAt(0);
363   Register input_lo = input.AsRegisterPairLow<Register>();
364   Register input_hi = input.AsRegisterPairHigh<Register>();
365   Location output = locations->Out();
366   Register output_lo = output.AsRegisterPairLow<Register>();
367   Register output_hi = output.AsRegisterPairHigh<Register>();
368   Register temp = locations->GetTemp(0).AsRegister<Register>();
369 
370   // Compute the sign into the temporary.
371   __ movl(temp, input_hi);
372   __ sarl(temp, Immediate(31));
373 
374   // Store the sign into the output.
375   __ movl(output_lo, temp);
376   __ movl(output_hi, temp);
377 
378   // XOR the input to the output.
379   __ xorl(output_lo, input_lo);
380   __ xorl(output_hi, input_hi);
381 
382   // Subtract the sign.
383   __ subl(output_lo, temp);
384   __ sbbl(output_hi, temp);
385 }
386 
VisitMathAbsInt(HInvoke * invoke)387 void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
388   CreateAbsIntLocation(arena_, invoke);
389 }
390 
VisitMathAbsInt(HInvoke * invoke)391 void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
392   GenAbsInteger(invoke->GetLocations(), GetAssembler());
393 }
394 
VisitMathAbsLong(HInvoke * invoke)395 void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
396   CreateAbsLongLocation(arena_, invoke);
397 }
398 
VisitMathAbsLong(HInvoke * invoke)399 void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
400   GenAbsLong(invoke->GetLocations(), GetAssembler());
401 }
402 
GenMinMaxFP(LocationSummary * locations,bool is_min,bool is_double,X86Assembler * assembler,CodeGeneratorX86 * codegen)403 static void GenMinMaxFP(LocationSummary* locations,
404                         bool is_min,
405                         bool is_double,
406                         X86Assembler* assembler,
407                         CodeGeneratorX86* codegen) {
408   Location op1_loc = locations->InAt(0);
409   Location op2_loc = locations->InAt(1);
410   Location out_loc = locations->Out();
411   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
412 
413   // Shortcut for same input locations.
414   if (op1_loc.Equals(op2_loc)) {
415     DCHECK(out_loc.Equals(op1_loc));
416     return;
417   }
418 
419   //  (out := op1)
420   //  out <=? op2
421   //  if Nan jmp Nan_label
422   //  if out is min jmp done
423   //  if op2 is min jmp op2_label
424   //  handle -0/+0
425   //  jmp done
426   // Nan_label:
427   //  out := NaN
428   // op2_label:
429   //  out := op2
430   // done:
431   //
432   // This removes one jmp, but needs to copy one input (op1) to out.
433   //
434   // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
435 
436   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
437 
438   NearLabel nan, done, op2_label;
439   if (is_double) {
440     __ ucomisd(out, op2);
441   } else {
442     __ ucomiss(out, op2);
443   }
444 
445   __ j(Condition::kParityEven, &nan);
446 
447   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
448   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
449 
450   // Handle 0.0/-0.0.
451   if (is_min) {
452     if (is_double) {
453       __ orpd(out, op2);
454     } else {
455       __ orps(out, op2);
456     }
457   } else {
458     if (is_double) {
459       __ andpd(out, op2);
460     } else {
461       __ andps(out, op2);
462     }
463   }
464   __ jmp(&done);
465 
466   // NaN handling.
467   __ Bind(&nan);
468   // Do we have a constant area pointer?
469   if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
470     DCHECK(locations->InAt(2).IsRegister());
471     Register constant_area = locations->InAt(2).AsRegister<Register>();
472     if (is_double) {
473       __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, constant_area));
474     } else {
475       __ movss(out, codegen->LiteralInt32Address(kFloatNaN, constant_area));
476     }
477   } else {
478     if (is_double) {
479       __ pushl(Immediate(kDoubleNaNHigh));
480       __ pushl(Immediate(kDoubleNaNLow));
481       __ movsd(out, Address(ESP, 0));
482       __ addl(ESP, Immediate(8));
483     } else {
484       __ pushl(Immediate(kFloatNaN));
485       __ movss(out, Address(ESP, 0));
486       __ addl(ESP, Immediate(4));
487     }
488   }
489   __ jmp(&done);
490 
491   // out := op2;
492   __ Bind(&op2_label);
493   if (is_double) {
494     __ movsd(out, op2);
495   } else {
496     __ movss(out, op2);
497   }
498 
499   // Done.
500   __ Bind(&done);
501 }
502 
CreateFPFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)503 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
504   LocationSummary* locations = new (arena) LocationSummary(invoke,
505                                                            LocationSummary::kNoCall,
506                                                            kIntrinsified);
507   locations->SetInAt(0, Location::RequiresFpuRegister());
508   locations->SetInAt(1, Location::RequiresFpuRegister());
509   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
510   // the second input to be the output (we can simply swap inputs).
511   locations->SetOut(Location::SameAsFirstInput());
512   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
513   DCHECK(static_or_direct != nullptr);
514   if (static_or_direct->HasSpecialInput() &&
515       invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
516     locations->SetInAt(2, Location::RequiresRegister());
517   }
518 }
519 
VisitMathMinDoubleDouble(HInvoke * invoke)520 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
521   CreateFPFPToFPLocations(arena_, invoke);
522 }
523 
VisitMathMinDoubleDouble(HInvoke * invoke)524 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
525   GenMinMaxFP(invoke->GetLocations(),
526               /* is_min */ true,
527               /* is_double */ true,
528               GetAssembler(),
529               codegen_);
530 }
531 
VisitMathMinFloatFloat(HInvoke * invoke)532 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
533   CreateFPFPToFPLocations(arena_, invoke);
534 }
535 
VisitMathMinFloatFloat(HInvoke * invoke)536 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
537   GenMinMaxFP(invoke->GetLocations(),
538               /* is_min */ true,
539               /* is_double */ false,
540               GetAssembler(),
541               codegen_);
542 }
543 
VisitMathMaxDoubleDouble(HInvoke * invoke)544 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
545   CreateFPFPToFPLocations(arena_, invoke);
546 }
547 
VisitMathMaxDoubleDouble(HInvoke * invoke)548 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
549   GenMinMaxFP(invoke->GetLocations(),
550               /* is_min */ false,
551               /* is_double */ true,
552               GetAssembler(),
553               codegen_);
554 }
555 
VisitMathMaxFloatFloat(HInvoke * invoke)556 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
557   CreateFPFPToFPLocations(arena_, invoke);
558 }
559 
VisitMathMaxFloatFloat(HInvoke * invoke)560 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
561   GenMinMaxFP(invoke->GetLocations(),
562               /* is_min */ false,
563               /* is_double */ false,
564               GetAssembler(),
565               codegen_);
566 }
567 
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,X86Assembler * assembler)568 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
569                       X86Assembler* assembler) {
570   Location op1_loc = locations->InAt(0);
571   Location op2_loc = locations->InAt(1);
572 
573   // Shortcut for same input locations.
574   if (op1_loc.Equals(op2_loc)) {
575     // Can return immediately, as op1_loc == out_loc.
576     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
577     //       a copy here.
578     DCHECK(locations->Out().Equals(op1_loc));
579     return;
580   }
581 
582   if (is_long) {
583     // Need to perform a subtract to get the sign right.
584     // op1 is already in the same location as the output.
585     Location output = locations->Out();
586     Register output_lo = output.AsRegisterPairLow<Register>();
587     Register output_hi = output.AsRegisterPairHigh<Register>();
588 
589     Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
590     Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
591 
592     // Spare register to compute the subtraction to set condition code.
593     Register temp = locations->GetTemp(0).AsRegister<Register>();
594 
595     // Subtract off op2_low.
596     __ movl(temp, output_lo);
597     __ subl(temp, op2_lo);
598 
599     // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
600     __ movl(temp, output_hi);
601     __ sbbl(temp, op2_hi);
602 
603     // Now the condition code is correct.
604     Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
605     __ cmovl(cond, output_lo, op2_lo);
606     __ cmovl(cond, output_hi, op2_hi);
607   } else {
608     Register out = locations->Out().AsRegister<Register>();
609     Register op2 = op2_loc.AsRegister<Register>();
610 
611     //  (out := op1)
612     //  out <=? op2
613     //  if out is min jmp done
614     //  out := op2
615     // done:
616 
617     __ cmpl(out, op2);
618     Condition cond = is_min ? Condition::kGreater : Condition::kLess;
619     __ cmovl(cond, out, op2);
620   }
621 }
622 
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)623 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
624   LocationSummary* locations = new (arena) LocationSummary(invoke,
625                                                            LocationSummary::kNoCall,
626                                                            kIntrinsified);
627   locations->SetInAt(0, Location::RequiresRegister());
628   locations->SetInAt(1, Location::RequiresRegister());
629   locations->SetOut(Location::SameAsFirstInput());
630 }
631 
CreateLongLongToLongLocations(ArenaAllocator * arena,HInvoke * invoke)632 static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
633   LocationSummary* locations = new (arena) LocationSummary(invoke,
634                                                            LocationSummary::kNoCall,
635                                                            kIntrinsified);
636   locations->SetInAt(0, Location::RequiresRegister());
637   locations->SetInAt(1, Location::RequiresRegister());
638   locations->SetOut(Location::SameAsFirstInput());
639   // Register to use to perform a long subtract to set cc.
640   locations->AddTemp(Location::RequiresRegister());
641 }
642 
VisitMathMinIntInt(HInvoke * invoke)643 void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
644   CreateIntIntToIntLocations(arena_, invoke);
645 }
646 
VisitMathMinIntInt(HInvoke * invoke)647 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
648   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
649 }
650 
VisitMathMinLongLong(HInvoke * invoke)651 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
652   CreateLongLongToLongLocations(arena_, invoke);
653 }
654 
VisitMathMinLongLong(HInvoke * invoke)655 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
656   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
657 }
658 
VisitMathMaxIntInt(HInvoke * invoke)659 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
660   CreateIntIntToIntLocations(arena_, invoke);
661 }
662 
VisitMathMaxIntInt(HInvoke * invoke)663 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
664   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
665 }
666 
VisitMathMaxLongLong(HInvoke * invoke)667 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
668   CreateLongLongToLongLocations(arena_, invoke);
669 }
670 
VisitMathMaxLongLong(HInvoke * invoke)671 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
672   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
673 }
674 
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)675 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
676   LocationSummary* locations = new (arena) LocationSummary(invoke,
677                                                            LocationSummary::kNoCall,
678                                                            kIntrinsified);
679   locations->SetInAt(0, Location::RequiresFpuRegister());
680   locations->SetOut(Location::RequiresFpuRegister());
681 }
682 
VisitMathSqrt(HInvoke * invoke)683 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
684   CreateFPToFPLocations(arena_, invoke);
685 }
686 
VisitMathSqrt(HInvoke * invoke)687 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
688   LocationSummary* locations = invoke->GetLocations();
689   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
690   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
691 
692   GetAssembler()->sqrtsd(out, in);
693 }
694 
InvokeOutOfLineIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke)695 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
696   MoveArguments(invoke, codegen);
697 
698   DCHECK(invoke->IsInvokeStaticOrDirect());
699   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
700                                       Location::RegisterLocation(EAX));
701   codegen->RecordPcInfo(invoke, invoke->GetDexPc());
702 
703   // Copy the result back to the expected output.
704   Location out = invoke->GetLocations()->Out();
705   if (out.IsValid()) {
706     DCHECK(out.IsRegister());
707     codegen->MoveFromReturnRegister(out, invoke->GetType());
708   }
709 }
710 
CreateSSE41FPToFPLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86 * codegen)711 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
712                                       HInvoke* invoke,
713                                       CodeGeneratorX86* codegen) {
714   // Do we have instruction support?
715   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
716     CreateFPToFPLocations(arena, invoke);
717     return;
718   }
719 
720   // We have to fall back to a call to the intrinsic.
721   LocationSummary* locations = new (arena) LocationSummary(invoke,
722                                                            LocationSummary::kCall);
723   InvokeRuntimeCallingConvention calling_convention;
724   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
725   locations->SetOut(Location::FpuRegisterLocation(XMM0));
726   // Needs to be EAX for the invoke.
727   locations->AddTemp(Location::RegisterLocation(EAX));
728 }
729 
GenSSE41FPToFPIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke,X86Assembler * assembler,int round_mode)730 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
731                                    HInvoke* invoke,
732                                    X86Assembler* assembler,
733                                    int round_mode) {
734   LocationSummary* locations = invoke->GetLocations();
735   if (locations->WillCall()) {
736     InvokeOutOfLineIntrinsic(codegen, invoke);
737   } else {
738     XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
739     XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
740     __ roundsd(out, in, Immediate(round_mode));
741   }
742 }
743 
VisitMathCeil(HInvoke * invoke)744 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
745   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
746 }
747 
VisitMathCeil(HInvoke * invoke)748 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
749   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
750 }
751 
VisitMathFloor(HInvoke * invoke)752 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
753   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
754 }
755 
VisitMathFloor(HInvoke * invoke)756 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
757   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
758 }
759 
VisitMathRint(HInvoke * invoke)760 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
761   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
762 }
763 
VisitMathRint(HInvoke * invoke)764 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
765   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
766 }
767 
768 // Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
769 // as it needs 64 bit instructions.
VisitMathRoundFloat(HInvoke * invoke)770 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
771   // See intrinsics.h.
772   if (!kRoundIsPlusPointFive) {
773     return;
774   }
775 
776   // Do we have instruction support?
777   if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
778     LocationSummary* locations = new (arena_) LocationSummary(invoke,
779                                                               LocationSummary::kNoCall,
780                                                               kIntrinsified);
781     locations->SetInAt(0, Location::RequiresFpuRegister());
782     locations->SetOut(Location::RequiresRegister());
783     locations->AddTemp(Location::RequiresFpuRegister());
784     locations->AddTemp(Location::RequiresFpuRegister());
785     return;
786   }
787 
788   // We have to fall back to a call to the intrinsic.
789   LocationSummary* locations = new (arena_) LocationSummary(invoke,
790                                                            LocationSummary::kCall);
791   InvokeRuntimeCallingConvention calling_convention;
792   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
793   locations->SetOut(Location::RegisterLocation(EAX));
794   // Needs to be EAX for the invoke.
795   locations->AddTemp(Location::RegisterLocation(EAX));
796 }
797 
VisitMathRoundFloat(HInvoke * invoke)798 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
799   LocationSummary* locations = invoke->GetLocations();
800   if (locations->WillCall()) {
801     InvokeOutOfLineIntrinsic(codegen_, invoke);
802     return;
803   }
804 
805   // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
806   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
807   Register out = locations->Out().AsRegister<Register>();
808   XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
809   XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
810   NearLabel done, nan;
811   X86Assembler* assembler = GetAssembler();
812 
813   // Generate 0.5 into inPlusPointFive.
814   __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
815   __ movd(inPlusPointFive, out);
816 
817   // Add in the input.
818   __ addss(inPlusPointFive, in);
819 
820   // And truncate to an integer.
821   __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
822 
823   __ movl(out, Immediate(kPrimIntMax));
824   // maxInt = int-to-float(out)
825   __ cvtsi2ss(maxInt, out);
826 
827   // if inPlusPointFive >= maxInt goto done
828   __ comiss(inPlusPointFive, maxInt);
829   __ j(kAboveEqual, &done);
830 
831   // if input == NaN goto nan
832   __ j(kUnordered, &nan);
833 
834   // output = float-to-int-truncate(input)
835   __ cvttss2si(out, inPlusPointFive);
836   __ jmp(&done);
837   __ Bind(&nan);
838 
839   //  output = 0
840   __ xorl(out, out);
841   __ Bind(&done);
842 }
843 
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)844 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
845                                       HInvoke* invoke) {
846   LocationSummary* locations = new (arena) LocationSummary(invoke,
847                                                            LocationSummary::kCall,
848                                                            kIntrinsified);
849   InvokeRuntimeCallingConvention calling_convention;
850   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
851   locations->SetOut(Location::FpuRegisterLocation(XMM0));
852 }
853 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)854 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
855   LocationSummary* locations = invoke->GetLocations();
856   DCHECK(locations->WillCall());
857   DCHECK(invoke->IsInvokeStaticOrDirect());
858   X86Assembler* assembler = codegen->GetAssembler();
859 
860   // We need some place to pass the parameters.
861   __ subl(ESP, Immediate(16));
862   __ cfi().AdjustCFAOffset(16);
863 
864   // Pass the parameters at the bottom of the stack.
865   __ movsd(Address(ESP, 0), XMM0);
866 
867   // If we have a second parameter, pass it next.
868   if (invoke->GetNumberOfArguments() == 2) {
869     __ movsd(Address(ESP, 8), XMM1);
870   }
871 
872   // Now do the actual call.
873   __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry)));
874 
875   // Extract the return value from the FP stack.
876   __ fstpl(Address(ESP, 0));
877   __ movsd(XMM0, Address(ESP, 0));
878 
879   // And clean up the stack.
880   __ addl(ESP, Immediate(16));
881   __ cfi().AdjustCFAOffset(-16);
882 
883   codegen->RecordPcInfo(invoke, invoke->GetDexPc());
884 }
885 
VisitMathCos(HInvoke * invoke)886 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
887   CreateFPToFPCallLocations(arena_, invoke);
888 }
889 
VisitMathCos(HInvoke * invoke)890 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
891   GenFPToFPCall(invoke, codegen_, kQuickCos);
892 }
893 
VisitMathSin(HInvoke * invoke)894 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
895   CreateFPToFPCallLocations(arena_, invoke);
896 }
897 
VisitMathSin(HInvoke * invoke)898 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
899   GenFPToFPCall(invoke, codegen_, kQuickSin);
900 }
901 
VisitMathAcos(HInvoke * invoke)902 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
903   CreateFPToFPCallLocations(arena_, invoke);
904 }
905 
VisitMathAcos(HInvoke * invoke)906 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
907   GenFPToFPCall(invoke, codegen_, kQuickAcos);
908 }
909 
VisitMathAsin(HInvoke * invoke)910 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
911   CreateFPToFPCallLocations(arena_, invoke);
912 }
913 
VisitMathAsin(HInvoke * invoke)914 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
915   GenFPToFPCall(invoke, codegen_, kQuickAsin);
916 }
917 
VisitMathAtan(HInvoke * invoke)918 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
919   CreateFPToFPCallLocations(arena_, invoke);
920 }
921 
VisitMathAtan(HInvoke * invoke)922 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
923   GenFPToFPCall(invoke, codegen_, kQuickAtan);
924 }
925 
VisitMathCbrt(HInvoke * invoke)926 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
927   CreateFPToFPCallLocations(arena_, invoke);
928 }
929 
VisitMathCbrt(HInvoke * invoke)930 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
931   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
932 }
933 
VisitMathCosh(HInvoke * invoke)934 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
935   CreateFPToFPCallLocations(arena_, invoke);
936 }
937 
VisitMathCosh(HInvoke * invoke)938 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
939   GenFPToFPCall(invoke, codegen_, kQuickCosh);
940 }
941 
VisitMathExp(HInvoke * invoke)942 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
943   CreateFPToFPCallLocations(arena_, invoke);
944 }
945 
VisitMathExp(HInvoke * invoke)946 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
947   GenFPToFPCall(invoke, codegen_, kQuickExp);
948 }
949 
VisitMathExpm1(HInvoke * invoke)950 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
951   CreateFPToFPCallLocations(arena_, invoke);
952 }
953 
VisitMathExpm1(HInvoke * invoke)954 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
955   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
956 }
957 
VisitMathLog(HInvoke * invoke)958 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
959   CreateFPToFPCallLocations(arena_, invoke);
960 }
961 
VisitMathLog(HInvoke * invoke)962 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
963   GenFPToFPCall(invoke, codegen_, kQuickLog);
964 }
965 
VisitMathLog10(HInvoke * invoke)966 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
967   CreateFPToFPCallLocations(arena_, invoke);
968 }
969 
VisitMathLog10(HInvoke * invoke)970 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
971   GenFPToFPCall(invoke, codegen_, kQuickLog10);
972 }
973 
VisitMathSinh(HInvoke * invoke)974 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
975   CreateFPToFPCallLocations(arena_, invoke);
976 }
977 
VisitMathSinh(HInvoke * invoke)978 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
979   GenFPToFPCall(invoke, codegen_, kQuickSinh);
980 }
981 
VisitMathTan(HInvoke * invoke)982 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
983   CreateFPToFPCallLocations(arena_, invoke);
984 }
985 
VisitMathTan(HInvoke * invoke)986 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
987   GenFPToFPCall(invoke, codegen_, kQuickTan);
988 }
989 
VisitMathTanh(HInvoke * invoke)990 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
991   CreateFPToFPCallLocations(arena_, invoke);
992 }
993 
VisitMathTanh(HInvoke * invoke)994 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
995   GenFPToFPCall(invoke, codegen_, kQuickTanh);
996 }
997 
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)998 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
999                                         HInvoke* invoke) {
1000   LocationSummary* locations = new (arena) LocationSummary(invoke,
1001                                                            LocationSummary::kCall,
1002                                                            kIntrinsified);
1003   InvokeRuntimeCallingConvention calling_convention;
1004   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
1005   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
1006   locations->SetOut(Location::FpuRegisterLocation(XMM0));
1007 }
1008 
VisitMathAtan2(HInvoke * invoke)1009 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
1010   CreateFPFPToFPCallLocations(arena_, invoke);
1011 }
1012 
VisitMathAtan2(HInvoke * invoke)1013 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
1014   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1015 }
1016 
VisitMathHypot(HInvoke * invoke)1017 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
1018   CreateFPFPToFPCallLocations(arena_, invoke);
1019 }
1020 
VisitMathHypot(HInvoke * invoke)1021 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
1022   GenFPToFPCall(invoke, codegen_, kQuickHypot);
1023 }
1024 
VisitMathNextAfter(HInvoke * invoke)1025 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
1026   CreateFPFPToFPCallLocations(arena_, invoke);
1027 }
1028 
VisitMathNextAfter(HInvoke * invoke)1029 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
1030   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1031 }
1032 
VisitStringCharAt(HInvoke * invoke)1033 void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
1034   // The inputs plus one temp.
1035   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1036                                                             LocationSummary::kCallOnSlowPath,
1037                                                             kIntrinsified);
1038   locations->SetInAt(0, Location::RequiresRegister());
1039   locations->SetInAt(1, Location::RequiresRegister());
1040   locations->SetOut(Location::SameAsFirstInput());
1041 }
1042 
VisitStringCharAt(HInvoke * invoke)1043 void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
1044   LocationSummary* locations = invoke->GetLocations();
1045 
1046   // Location of reference to data array.
1047   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1048   // Location of count.
1049   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1050 
1051   Register obj = locations->InAt(0).AsRegister<Register>();
1052   Register idx = locations->InAt(1).AsRegister<Register>();
1053   Register out = locations->Out().AsRegister<Register>();
1054 
1055   // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
1056   //       the cost.
1057   // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
1058   //       we will not optimize the code for constants (which would save a register).
1059 
1060   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1061   codegen_->AddSlowPath(slow_path);
1062 
1063   X86Assembler* assembler = GetAssembler();
1064 
1065   __ cmpl(idx, Address(obj, count_offset));
1066   codegen_->MaybeRecordImplicitNullCheck(invoke);
1067   __ j(kAboveEqual, slow_path->GetEntryLabel());
1068 
1069   // out = out[2*idx].
1070   __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
1071 
1072   __ Bind(slow_path->GetExitLabel());
1073 }
1074 
VisitSystemArrayCopyChar(HInvoke * invoke)1075 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1076   // We need at least two of the positions or length to be an integer constant,
1077   // or else we won't have enough free registers.
1078   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1079   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1080   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1081 
1082   int num_constants =
1083       ((src_pos != nullptr) ? 1 : 0)
1084       + ((dest_pos != nullptr) ? 1 : 0)
1085       + ((length != nullptr) ? 1 : 0);
1086 
1087   if (num_constants < 2) {
1088     // Not enough free registers.
1089     return;
1090   }
1091 
1092   // As long as we are checking, we might as well check to see if the src and dest
1093   // positions are >= 0.
1094   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1095       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
1096     // We will have to fail anyways.
1097     return;
1098   }
1099 
1100   // And since we are already checking, check the length too.
1101   if (length != nullptr) {
1102     int32_t len = length->GetValue();
1103     if (len < 0) {
1104       // Just call as normal.
1105       return;
1106     }
1107   }
1108 
1109   // Okay, it is safe to generate inline code.
1110   LocationSummary* locations =
1111     new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1112   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
1113   locations->SetInAt(0, Location::RequiresRegister());
1114   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1115   locations->SetInAt(2, Location::RequiresRegister());
1116   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
1117   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
1118 
1119   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1120   locations->AddTemp(Location::RegisterLocation(ESI));
1121   locations->AddTemp(Location::RegisterLocation(EDI));
1122   locations->AddTemp(Location::RegisterLocation(ECX));
1123 }
1124 
CheckPosition(X86Assembler * assembler,Location pos,Register input,Register length,SlowPathCode * slow_path,Register input_len,Register temp)1125 static void CheckPosition(X86Assembler* assembler,
1126                           Location pos,
1127                           Register input,
1128                           Register length,
1129                           SlowPathCode* slow_path,
1130                           Register input_len,
1131                           Register temp) {
1132   // Where is the length in the String?
1133   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1134 
1135   if (pos.IsConstant()) {
1136     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1137     if (pos_const == 0) {
1138       // Check that length(input) >= length.
1139       __ cmpl(Address(input, length_offset), length);
1140       __ j(kLess, slow_path->GetEntryLabel());
1141     } else {
1142       // Check that length(input) >= pos.
1143       __ movl(input_len, Address(input, length_offset));
1144       __ cmpl(input_len, Immediate(pos_const));
1145       __ j(kLess, slow_path->GetEntryLabel());
1146 
1147       // Check that (length(input) - pos) >= length.
1148       __ leal(temp, Address(input_len, -pos_const));
1149       __ cmpl(temp, length);
1150       __ j(kLess, slow_path->GetEntryLabel());
1151     }
1152   } else {
1153     // Check that pos >= 0.
1154     Register pos_reg = pos.AsRegister<Register>();
1155     __ testl(pos_reg, pos_reg);
1156     __ j(kLess, slow_path->GetEntryLabel());
1157 
1158     // Check that pos <= length(input).
1159     __ cmpl(Address(input, length_offset), pos_reg);
1160     __ j(kLess, slow_path->GetEntryLabel());
1161 
1162     // Check that (length(input) - pos) >= length.
1163     __ movl(temp, Address(input, length_offset));
1164     __ subl(temp, pos_reg);
1165     __ cmpl(temp, length);
1166     __ j(kLess, slow_path->GetEntryLabel());
1167   }
1168 }
1169 
VisitSystemArrayCopyChar(HInvoke * invoke)1170 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1171   X86Assembler* assembler = GetAssembler();
1172   LocationSummary* locations = invoke->GetLocations();
1173 
1174   Register src = locations->InAt(0).AsRegister<Register>();
1175   Location srcPos = locations->InAt(1);
1176   Register dest = locations->InAt(2).AsRegister<Register>();
1177   Location destPos = locations->InAt(3);
1178   Location length = locations->InAt(4);
1179 
1180   // Temporaries that we need for MOVSW.
1181   Register src_base = locations->GetTemp(0).AsRegister<Register>();
1182   DCHECK_EQ(src_base, ESI);
1183   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
1184   DCHECK_EQ(dest_base, EDI);
1185   Register count = locations->GetTemp(2).AsRegister<Register>();
1186   DCHECK_EQ(count, ECX);
1187 
1188   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1189   codegen_->AddSlowPath(slow_path);
1190 
1191   // Bail out if the source and destination are the same (to handle overlap).
1192   __ cmpl(src, dest);
1193   __ j(kEqual, slow_path->GetEntryLabel());
1194 
1195   // Bail out if the source is null.
1196   __ testl(src, src);
1197   __ j(kEqual, slow_path->GetEntryLabel());
1198 
1199   // Bail out if the destination is null.
1200   __ testl(dest, dest);
1201   __ j(kEqual, slow_path->GetEntryLabel());
1202 
1203   // If the length is negative, bail out.
1204   // We have already checked in the LocationsBuilder for the constant case.
1205   if (!length.IsConstant()) {
1206     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
1207     __ j(kLess, slow_path->GetEntryLabel());
1208   }
1209 
1210   // We need the count in ECX.
1211   if (length.IsConstant()) {
1212     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1213   } else {
1214     __ movl(count, length.AsRegister<Register>());
1215   }
1216 
1217   // Validity checks: source.
1218   CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
1219 
1220   // Validity checks: dest.
1221   CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
1222 
1223   // Okay, everything checks out.  Finally time to do the copy.
1224   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1225   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1226   DCHECK_EQ(char_size, 2u);
1227 
1228   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1229 
1230   if (srcPos.IsConstant()) {
1231     int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
1232     __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
1233   } else {
1234     __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
1235                               ScaleFactor::TIMES_2, data_offset));
1236   }
1237   if (destPos.IsConstant()) {
1238     int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
1239 
1240     __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
1241   } else {
1242     __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
1243                                ScaleFactor::TIMES_2, data_offset));
1244   }
1245 
1246   // Do the move.
1247   __ rep_movsw();
1248 
1249   __ Bind(slow_path->GetExitLabel());
1250 }
1251 
VisitStringCompareTo(HInvoke * invoke)1252 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1253   // The inputs plus one temp.
1254   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1255                                                             LocationSummary::kCall,
1256                                                             kIntrinsified);
1257   InvokeRuntimeCallingConvention calling_convention;
1258   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1259   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1260   locations->SetOut(Location::RegisterLocation(EAX));
1261 }
1262 
VisitStringCompareTo(HInvoke * invoke)1263 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1264   X86Assembler* assembler = GetAssembler();
1265   LocationSummary* locations = invoke->GetLocations();
1266 
1267   // Note that the null check must have been done earlier.
1268   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1269 
1270   Register argument = locations->InAt(1).AsRegister<Register>();
1271   __ testl(argument, argument);
1272   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1273   codegen_->AddSlowPath(slow_path);
1274   __ j(kEqual, slow_path->GetEntryLabel());
1275 
1276   __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo)));
1277   __ Bind(slow_path->GetExitLabel());
1278 }
1279 
VisitStringEquals(HInvoke * invoke)1280 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1281   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1282                                                             LocationSummary::kNoCall,
1283                                                             kIntrinsified);
1284   locations->SetInAt(0, Location::RequiresRegister());
1285   locations->SetInAt(1, Location::RequiresRegister());
1286 
1287   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1288   locations->AddTemp(Location::RegisterLocation(ECX));
1289   locations->AddTemp(Location::RegisterLocation(EDI));
1290 
1291   // Set output, ESI needed for repe_cmpsl instruction anyways.
1292   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1293 }
1294 
VisitStringEquals(HInvoke * invoke)1295 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1296   X86Assembler* assembler = GetAssembler();
1297   LocationSummary* locations = invoke->GetLocations();
1298 
1299   Register str = locations->InAt(0).AsRegister<Register>();
1300   Register arg = locations->InAt(1).AsRegister<Register>();
1301   Register ecx = locations->GetTemp(0).AsRegister<Register>();
1302   Register edi = locations->GetTemp(1).AsRegister<Register>();
1303   Register esi = locations->Out().AsRegister<Register>();
1304 
1305   NearLabel end, return_true, return_false;
1306 
1307   // Get offsets of count, value, and class fields within a string object.
1308   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1309   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1310   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1311 
1312   // Note that the null check must have been done earlier.
1313   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1314 
1315   StringEqualsOptimizations optimizations(invoke);
1316   if (!optimizations.GetArgumentNotNull()) {
1317     // Check if input is null, return false if it is.
1318     __ testl(arg, arg);
1319     __ j(kEqual, &return_false);
1320   }
1321 
1322   // Instanceof check for the argument by comparing class fields.
1323   // All string objects must have the same type since String cannot be subclassed.
1324   // Receiver must be a string object, so its class field is equal to all strings' class fields.
1325   // If the argument is a string object, its class field must be equal to receiver's class field.
1326   if (!optimizations.GetArgumentIsString()) {
1327     __ movl(ecx, Address(str, class_offset));
1328     __ cmpl(ecx, Address(arg, class_offset));
1329     __ j(kNotEqual, &return_false);
1330   }
1331 
1332   // Reference equality check, return true if same reference.
1333   __ cmpl(str, arg);
1334   __ j(kEqual, &return_true);
1335 
1336   // Load length of receiver string.
1337   __ movl(ecx, Address(str, count_offset));
1338   // Check if lengths are equal, return false if they're not.
1339   __ cmpl(ecx, Address(arg, count_offset));
1340   __ j(kNotEqual, &return_false);
1341   // Return true if both strings are empty.
1342   __ jecxz(&return_true);
1343 
1344   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1345   __ leal(esi, Address(str, value_offset));
1346   __ leal(edi, Address(arg, value_offset));
1347 
1348   // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths.
1349   __ addl(ecx, Immediate(1));
1350   __ shrl(ecx, Immediate(1));
1351 
1352   // Assertions that must hold in order to compare strings 2 characters at a time.
1353   DCHECK_ALIGNED(value_offset, 4);
1354   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1355 
1356   // Loop to compare strings two characters at a time starting at the beginning of the string.
1357   __ repe_cmpsl();
1358   // If strings are not equal, zero flag will be cleared.
1359   __ j(kNotEqual, &return_false);
1360 
1361   // Return true and exit the function.
1362   // If loop does not result in returning false, we return true.
1363   __ Bind(&return_true);
1364   __ movl(esi, Immediate(1));
1365   __ jmp(&end);
1366 
1367   // Return false and exit the function.
1368   __ Bind(&return_false);
1369   __ xorl(esi, esi);
1370   __ Bind(&end);
1371 }
1372 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1373 static void CreateStringIndexOfLocations(HInvoke* invoke,
1374                                          ArenaAllocator* allocator,
1375                                          bool start_at_zero) {
1376   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1377                                                                LocationSummary::kCallOnSlowPath,
1378                                                                kIntrinsified);
1379   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1380   locations->SetInAt(0, Location::RegisterLocation(EDI));
1381   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1382   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1383   // of the instruction explicitly.
1384   // Note: This works as we don't clobber EAX anywhere.
1385   locations->SetInAt(1, Location::RegisterLocation(EAX));
1386   if (!start_at_zero) {
1387     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1388   }
1389   // As we clobber EDI during execution anyways, also use it as the output.
1390   locations->SetOut(Location::SameAsFirstInput());
1391 
1392   // repne scasw uses ECX as the counter.
1393   locations->AddTemp(Location::RegisterLocation(ECX));
1394   // Need another temporary to be able to compute the result.
1395   locations->AddTemp(Location::RequiresRegister());
1396 }
1397 
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,ArenaAllocator * allocator,bool start_at_zero)1398 static void GenerateStringIndexOf(HInvoke* invoke,
1399                                   X86Assembler* assembler,
1400                                   CodeGeneratorX86* codegen,
1401                                   ArenaAllocator* allocator,
1402                                   bool start_at_zero) {
1403   LocationSummary* locations = invoke->GetLocations();
1404 
1405   // Note that the null check must have been done earlier.
1406   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1407 
1408   Register string_obj = locations->InAt(0).AsRegister<Register>();
1409   Register search_value = locations->InAt(1).AsRegister<Register>();
1410   Register counter = locations->GetTemp(0).AsRegister<Register>();
1411   Register string_length = locations->GetTemp(1).AsRegister<Register>();
1412   Register out = locations->Out().AsRegister<Register>();
1413 
1414   // Check our assumptions for registers.
1415   DCHECK_EQ(string_obj, EDI);
1416   DCHECK_EQ(search_value, EAX);
1417   DCHECK_EQ(counter, ECX);
1418   DCHECK_EQ(out, EDI);
1419 
1420   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1421   // or directly dispatch if we have a constant.
1422   SlowPathCode* slow_path = nullptr;
1423   if (invoke->InputAt(1)->IsIntConstant()) {
1424     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
1425     std::numeric_limits<uint16_t>::max()) {
1426       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1427       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1428       slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1429       codegen->AddSlowPath(slow_path);
1430       __ jmp(slow_path->GetEntryLabel());
1431       __ Bind(slow_path->GetExitLabel());
1432       return;
1433     }
1434   } else {
1435     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1436     slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1437     codegen->AddSlowPath(slow_path);
1438     __ j(kAbove, slow_path->GetEntryLabel());
1439   }
1440 
1441   // From here down, we know that we are looking for a char that fits in 16 bits.
1442   // Location of reference to data array within the String object.
1443   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1444   // Location of count within the String object.
1445   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1446 
1447   // Load string length, i.e., the count field of the string.
1448   __ movl(string_length, Address(string_obj, count_offset));
1449 
1450   // Do a zero-length check.
1451   // TODO: Support jecxz.
1452   NearLabel not_found_label;
1453   __ testl(string_length, string_length);
1454   __ j(kEqual, &not_found_label);
1455 
1456   if (start_at_zero) {
1457     // Number of chars to scan is the same as the string length.
1458     __ movl(counter, string_length);
1459 
1460     // Move to the start of the string.
1461     __ addl(string_obj, Immediate(value_offset));
1462   } else {
1463     Register start_index = locations->InAt(2).AsRegister<Register>();
1464 
1465     // Do a start_index check.
1466     __ cmpl(start_index, string_length);
1467     __ j(kGreaterEqual, &not_found_label);
1468 
1469     // Ensure we have a start index >= 0;
1470     __ xorl(counter, counter);
1471     __ cmpl(start_index, Immediate(0));
1472     __ cmovl(kGreater, counter, start_index);
1473 
1474     // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1475     __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1476 
1477     // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1478     // compare.
1479     __ negl(counter);
1480     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1481   }
1482 
1483   // Everything is set up for repne scasw:
1484   //   * Comparison address in EDI.
1485   //   * Counter in ECX.
1486   __ repne_scasw();
1487 
1488   // Did we find a match?
1489   __ j(kNotEqual, &not_found_label);
1490 
1491   // Yes, we matched.  Compute the index of the result.
1492   __ subl(string_length, counter);
1493   __ leal(out, Address(string_length, -1));
1494 
1495   NearLabel done;
1496   __ jmp(&done);
1497 
1498   // Failed to match; return -1.
1499   __ Bind(&not_found_label);
1500   __ movl(out, Immediate(-1));
1501 
1502   // And join up at the end.
1503   __ Bind(&done);
1504   if (slow_path != nullptr) {
1505     __ Bind(slow_path->GetExitLabel());
1506   }
1507 }
1508 
VisitStringIndexOf(HInvoke * invoke)1509 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1510   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
1511 }
1512 
VisitStringIndexOf(HInvoke * invoke)1513 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1514   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1515 }
1516 
VisitStringIndexOfAfter(HInvoke * invoke)1517 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1518   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
1519 }
1520 
VisitStringIndexOfAfter(HInvoke * invoke)1521 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1522   GenerateStringIndexOf(
1523       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1524 }
1525 
VisitStringNewStringFromBytes(HInvoke * invoke)1526 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1527   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1528                                                             LocationSummary::kCall,
1529                                                             kIntrinsified);
1530   InvokeRuntimeCallingConvention calling_convention;
1531   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1532   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1533   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1534   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1535   locations->SetOut(Location::RegisterLocation(EAX));
1536 }
1537 
VisitStringNewStringFromBytes(HInvoke * invoke)1538 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1539   X86Assembler* assembler = GetAssembler();
1540   LocationSummary* locations = invoke->GetLocations();
1541 
1542   Register byte_array = locations->InAt(0).AsRegister<Register>();
1543   __ testl(byte_array, byte_array);
1544   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1545   codegen_->AddSlowPath(slow_path);
1546   __ j(kEqual, slow_path->GetEntryLabel());
1547 
1548   __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
1549   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1550   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1551   __ Bind(slow_path->GetExitLabel());
1552 }
1553 
VisitStringNewStringFromChars(HInvoke * invoke)1554 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1555   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1556                                                             LocationSummary::kCall,
1557                                                             kIntrinsified);
1558   InvokeRuntimeCallingConvention calling_convention;
1559   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1560   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1561   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1562   locations->SetOut(Location::RegisterLocation(EAX));
1563 }
1564 
VisitStringNewStringFromChars(HInvoke * invoke)1565 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1566   X86Assembler* assembler = GetAssembler();
1567 
1568   // No need to emit code checking whether `locations->InAt(2)` is a null
1569   // pointer, as callers of the native method
1570   //
1571   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1572   //
1573   // all include a null check on `data` before calling that method.
1574   __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
1575   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1576   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1577 }
1578 
VisitStringNewStringFromString(HInvoke * invoke)1579 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1580   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1581                                                             LocationSummary::kCall,
1582                                                             kIntrinsified);
1583   InvokeRuntimeCallingConvention calling_convention;
1584   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1585   locations->SetOut(Location::RegisterLocation(EAX));
1586 }
1587 
VisitStringNewStringFromString(HInvoke * invoke)1588 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1589   X86Assembler* assembler = GetAssembler();
1590   LocationSummary* locations = invoke->GetLocations();
1591 
1592   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1593   __ testl(string_to_copy, string_to_copy);
1594   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1595   codegen_->AddSlowPath(slow_path);
1596   __ j(kEqual, slow_path->GetEntryLabel());
1597 
1598   __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
1599   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1600   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1601   __ Bind(slow_path->GetExitLabel());
1602 }
1603 
VisitStringGetCharsNoCheck(HInvoke * invoke)1604 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1605   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1606   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1607                                                             LocationSummary::kNoCall,
1608                                                             kIntrinsified);
1609   locations->SetInAt(0, Location::RequiresRegister());
1610   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1611   // Place srcEnd in ECX to save a move below.
1612   locations->SetInAt(2, Location::RegisterLocation(ECX));
1613   locations->SetInAt(3, Location::RequiresRegister());
1614   locations->SetInAt(4, Location::RequiresRegister());
1615 
1616   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1617   // We don't have enough registers to also grab ECX, so handle below.
1618   locations->AddTemp(Location::RegisterLocation(ESI));
1619   locations->AddTemp(Location::RegisterLocation(EDI));
1620 }
1621 
VisitStringGetCharsNoCheck(HInvoke * invoke)1622 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1623   X86Assembler* assembler = GetAssembler();
1624   LocationSummary* locations = invoke->GetLocations();
1625 
1626   size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1627   // Location of data in char array buffer.
1628   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1629   // Location of char array data in string.
1630   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1631 
1632   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1633   Register obj = locations->InAt(0).AsRegister<Register>();
1634   Location srcBegin = locations->InAt(1);
1635   int srcBegin_value =
1636     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1637   Register srcEnd = locations->InAt(2).AsRegister<Register>();
1638   Register dst = locations->InAt(3).AsRegister<Register>();
1639   Register dstBegin = locations->InAt(4).AsRegister<Register>();
1640 
1641   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1642   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1643   DCHECK_EQ(char_size, 2u);
1644 
1645   // Compute the address of the destination buffer.
1646   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1647 
1648   // Compute the address of the source string.
1649   if (srcBegin.IsConstant()) {
1650     // Compute the address of the source string by adding the number of chars from
1651     // the source beginning to the value offset of a string.
1652     __ leal(ESI, Address(obj, srcBegin_value * char_size + value_offset));
1653   } else {
1654     __ leal(ESI, Address(obj, srcBegin.AsRegister<Register>(),
1655                          ScaleFactor::TIMES_2, value_offset));
1656   }
1657 
1658   // Compute the number of chars (words) to move.
1659   // Now is the time to save ECX, since we don't know if it will be used later.
1660   __ pushl(ECX);
1661   int stack_adjust = kX86WordSize;
1662   __ cfi().AdjustCFAOffset(stack_adjust);
1663   DCHECK_EQ(srcEnd, ECX);
1664   if (srcBegin.IsConstant()) {
1665     if (srcBegin_value != 0) {
1666       __ subl(ECX, Immediate(srcBegin_value));
1667     }
1668   } else {
1669     DCHECK(srcBegin.IsRegister());
1670     __ subl(ECX, srcBegin.AsRegister<Register>());
1671   }
1672 
1673   // Do the move.
1674   __ rep_movsw();
1675 
1676   // And restore ECX.
1677   __ popl(ECX);
1678   __ cfi().AdjustCFAOffset(-stack_adjust);
1679 }
1680 
GenPeek(LocationSummary * locations,Primitive::Type size,X86Assembler * assembler)1681 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1682   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1683   Location out_loc = locations->Out();
1684   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1685   // to avoid a SIGBUS.
1686   switch (size) {
1687     case Primitive::kPrimByte:
1688       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1689       break;
1690     case Primitive::kPrimShort:
1691       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1692       break;
1693     case Primitive::kPrimInt:
1694       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1695       break;
1696     case Primitive::kPrimLong:
1697       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1698       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1699       break;
1700     default:
1701       LOG(FATAL) << "Type not recognized for peek: " << size;
1702       UNREACHABLE();
1703   }
1704 }
1705 
VisitMemoryPeekByte(HInvoke * invoke)1706 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1707   CreateLongToIntLocations(arena_, invoke);
1708 }
1709 
VisitMemoryPeekByte(HInvoke * invoke)1710 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1711   GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1712 }
1713 
VisitMemoryPeekIntNative(HInvoke * invoke)1714 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1715   CreateLongToIntLocations(arena_, invoke);
1716 }
1717 
VisitMemoryPeekIntNative(HInvoke * invoke)1718 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1719   GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1720 }
1721 
VisitMemoryPeekLongNative(HInvoke * invoke)1722 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1723   CreateLongToLongLocations(arena_, invoke);
1724 }
1725 
VisitMemoryPeekLongNative(HInvoke * invoke)1726 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1727   GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1728 }
1729 
VisitMemoryPeekShortNative(HInvoke * invoke)1730 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1731   CreateLongToIntLocations(arena_, invoke);
1732 }
1733 
VisitMemoryPeekShortNative(HInvoke * invoke)1734 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1735   GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1736 }
1737 
CreateLongIntToVoidLocations(ArenaAllocator * arena,Primitive::Type size,HInvoke * invoke)1738 static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1739                                          HInvoke* invoke) {
1740   LocationSummary* locations = new (arena) LocationSummary(invoke,
1741                                                            LocationSummary::kNoCall,
1742                                                            kIntrinsified);
1743   locations->SetInAt(0, Location::RequiresRegister());
1744   HInstruction* value = invoke->InputAt(1);
1745   if (size == Primitive::kPrimByte) {
1746     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1747   } else {
1748     locations->SetInAt(1, Location::RegisterOrConstant(value));
1749   }
1750 }
1751 
GenPoke(LocationSummary * locations,Primitive::Type size,X86Assembler * assembler)1752 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1753   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1754   Location value_loc = locations->InAt(1);
1755   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1756   // to avoid a SIGBUS.
1757   switch (size) {
1758     case Primitive::kPrimByte:
1759       if (value_loc.IsConstant()) {
1760         __ movb(Address(address, 0),
1761                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1762       } else {
1763         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1764       }
1765       break;
1766     case Primitive::kPrimShort:
1767       if (value_loc.IsConstant()) {
1768         __ movw(Address(address, 0),
1769                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1770       } else {
1771         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1772       }
1773       break;
1774     case Primitive::kPrimInt:
1775       if (value_loc.IsConstant()) {
1776         __ movl(Address(address, 0),
1777                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1778       } else {
1779         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1780       }
1781       break;
1782     case Primitive::kPrimLong:
1783       if (value_loc.IsConstant()) {
1784         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1785         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1786         __ movl(Address(address, 4), Immediate(High32Bits(value)));
1787       } else {
1788         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1789         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1790       }
1791       break;
1792     default:
1793       LOG(FATAL) << "Type not recognized for poke: " << size;
1794       UNREACHABLE();
1795   }
1796 }
1797 
VisitMemoryPokeByte(HInvoke * invoke)1798 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1799   CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1800 }
1801 
VisitMemoryPokeByte(HInvoke * invoke)1802 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1803   GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1804 }
1805 
VisitMemoryPokeIntNative(HInvoke * invoke)1806 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1807   CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1808 }
1809 
VisitMemoryPokeIntNative(HInvoke * invoke)1810 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1811   GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1812 }
1813 
VisitMemoryPokeLongNative(HInvoke * invoke)1814 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1815   CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1816 }
1817 
VisitMemoryPokeLongNative(HInvoke * invoke)1818 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1819   GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1820 }
1821 
VisitMemoryPokeShortNative(HInvoke * invoke)1822 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1823   CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1824 }
1825 
VisitMemoryPokeShortNative(HInvoke * invoke)1826 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1827   GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1828 }
1829 
VisitThreadCurrentThread(HInvoke * invoke)1830 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1831   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1832                                                             LocationSummary::kNoCall,
1833                                                             kIntrinsified);
1834   locations->SetOut(Location::RequiresRegister());
1835 }
1836 
VisitThreadCurrentThread(HInvoke * invoke)1837 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1838   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1839   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
1840 }
1841 
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1842 static void GenUnsafeGet(HInvoke* invoke,
1843                          Primitive::Type type,
1844                          bool is_volatile,
1845                          CodeGeneratorX86* codegen) {
1846   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1847   LocationSummary* locations = invoke->GetLocations();
1848   Location base_loc = locations->InAt(1);
1849   Register base = base_loc.AsRegister<Register>();
1850   Location offset_loc = locations->InAt(2);
1851   Register offset = offset_loc.AsRegisterPairLow<Register>();
1852   Location output_loc = locations->Out();
1853 
1854   switch (type) {
1855     case Primitive::kPrimInt: {
1856       Register output = output_loc.AsRegister<Register>();
1857       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1858       break;
1859     }
1860 
1861     case Primitive::kPrimNot: {
1862       Register output = output_loc.AsRegister<Register>();
1863       if (kEmitCompilerReadBarrier) {
1864         if (kUseBakerReadBarrier) {
1865           Location temp = locations->GetTemp(0);
1866           codegen->GenerateArrayLoadWithBakerReadBarrier(
1867               invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
1868         } else {
1869           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1870           codegen->GenerateReadBarrierSlow(
1871               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1872         }
1873       } else {
1874         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1875         __ MaybeUnpoisonHeapReference(output);
1876       }
1877       break;
1878     }
1879 
1880     case Primitive::kPrimLong: {
1881         Register output_lo = output_loc.AsRegisterPairLow<Register>();
1882         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1883         if (is_volatile) {
1884           // Need to use a XMM to read atomically.
1885           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1886           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1887           __ movd(output_lo, temp);
1888           __ psrlq(temp, Immediate(32));
1889           __ movd(output_hi, temp);
1890         } else {
1891           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1892           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1893         }
1894       }
1895       break;
1896 
1897     default:
1898       LOG(FATAL) << "Unsupported op size " << type;
1899       UNREACHABLE();
1900   }
1901 }
1902 
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type,bool is_volatile)1903 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
1904                                           HInvoke* invoke,
1905                                           Primitive::Type type,
1906                                           bool is_volatile) {
1907   bool can_call = kEmitCompilerReadBarrier &&
1908       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1909        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1910   LocationSummary* locations = new (arena) LocationSummary(invoke,
1911                                                            can_call ?
1912                                                                LocationSummary::kCallOnSlowPath :
1913                                                                LocationSummary::kNoCall,
1914                                                            kIntrinsified);
1915   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1916   locations->SetInAt(1, Location::RequiresRegister());
1917   locations->SetInAt(2, Location::RequiresRegister());
1918   if (type == Primitive::kPrimLong) {
1919     if (is_volatile) {
1920       // Need to use XMM to read volatile.
1921       locations->AddTemp(Location::RequiresFpuRegister());
1922       locations->SetOut(Location::RequiresRegister());
1923     } else {
1924       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1925     }
1926   } else {
1927     locations->SetOut(Location::RequiresRegister());
1928   }
1929   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1930     // We need a temporary register for the read barrier marking slow
1931     // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
1932     locations->AddTemp(Location::RequiresRegister());
1933   }
1934 }
1935 
VisitUnsafeGet(HInvoke * invoke)1936 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1937   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
1938 }
VisitUnsafeGetVolatile(HInvoke * invoke)1939 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1940   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
1941 }
VisitUnsafeGetLong(HInvoke * invoke)1942 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1943   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
1944 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1945 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1946   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
1947 }
VisitUnsafeGetObject(HInvoke * invoke)1948 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1949   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
1950 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1951 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1952   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
1953 }
1954 
1955 
VisitUnsafeGet(HInvoke * invoke)1956 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1957   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
1958 }
VisitUnsafeGetVolatile(HInvoke * invoke)1959 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1960   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
1961 }
VisitUnsafeGetLong(HInvoke * invoke)1962 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1963   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
1964 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1965 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1966   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
1967 }
VisitUnsafeGetObject(HInvoke * invoke)1968 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1969   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
1970 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1971 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1972   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
1973 }
1974 
1975 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke,bool is_volatile)1976 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1977                                                        Primitive::Type type,
1978                                                        HInvoke* invoke,
1979                                                        bool is_volatile) {
1980   LocationSummary* locations = new (arena) LocationSummary(invoke,
1981                                                            LocationSummary::kNoCall,
1982                                                            kIntrinsified);
1983   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1984   locations->SetInAt(1, Location::RequiresRegister());
1985   locations->SetInAt(2, Location::RequiresRegister());
1986   locations->SetInAt(3, Location::RequiresRegister());
1987   if (type == Primitive::kPrimNot) {
1988     // Need temp registers for card-marking.
1989     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1990     // Ensure the value is in a byte register.
1991     locations->AddTemp(Location::RegisterLocation(ECX));
1992   } else if (type == Primitive::kPrimLong && is_volatile) {
1993     locations->AddTemp(Location::RequiresFpuRegister());
1994     locations->AddTemp(Location::RequiresFpuRegister());
1995   }
1996 }
1997 
VisitUnsafePut(HInvoke * invoke)1998 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1999   CreateIntIntIntIntToVoidPlusTempsLocations(
2000       arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
2001 }
VisitUnsafePutOrdered(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2003   CreateIntIntIntIntToVoidPlusTempsLocations(
2004       arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
2005 }
VisitUnsafePutVolatile(HInvoke * invoke)2006 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2007   CreateIntIntIntIntToVoidPlusTempsLocations(
2008       arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
2009 }
VisitUnsafePutObject(HInvoke * invoke)2010 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
2011   CreateIntIntIntIntToVoidPlusTempsLocations(
2012       arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
2013 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2015   CreateIntIntIntIntToVoidPlusTempsLocations(
2016       arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
2017 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2018 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2019   CreateIntIntIntIntToVoidPlusTempsLocations(
2020       arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
2021 }
VisitUnsafePutLong(HInvoke * invoke)2022 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
2023   CreateIntIntIntIntToVoidPlusTempsLocations(
2024       arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
2025 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2026 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2027   CreateIntIntIntIntToVoidPlusTempsLocations(
2028       arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
2029 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2030 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2031   CreateIntIntIntIntToVoidPlusTempsLocations(
2032       arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
2033 }
2034 
2035 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2036 // memory model.
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,CodeGeneratorX86 * codegen)2037 static void GenUnsafePut(LocationSummary* locations,
2038                          Primitive::Type type,
2039                          bool is_volatile,
2040                          CodeGeneratorX86* codegen) {
2041   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2042   Register base = locations->InAt(1).AsRegister<Register>();
2043   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2044   Location value_loc = locations->InAt(3);
2045 
2046   if (type == Primitive::kPrimLong) {
2047     Register value_lo = value_loc.AsRegisterPairLow<Register>();
2048     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2049     if (is_volatile) {
2050       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2051       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2052       __ movd(temp1, value_lo);
2053       __ movd(temp2, value_hi);
2054       __ punpckldq(temp1, temp2);
2055       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2056     } else {
2057       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2058       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2059     }
2060   } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2061     Register temp = locations->GetTemp(0).AsRegister<Register>();
2062     __ movl(temp, value_loc.AsRegister<Register>());
2063     __ PoisonHeapReference(temp);
2064     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2065   } else {
2066     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2067   }
2068 
2069   if (is_volatile) {
2070     codegen->MemoryFence();
2071   }
2072 
2073   if (type == Primitive::kPrimNot) {
2074     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2075     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2076                         locations->GetTemp(1).AsRegister<Register>(),
2077                         base,
2078                         value_loc.AsRegister<Register>(),
2079                         value_can_be_null);
2080   }
2081 }
2082 
VisitUnsafePut(HInvoke * invoke)2083 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
2084   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2085 }
VisitUnsafePutOrdered(HInvoke * invoke)2086 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2087   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2088 }
VisitUnsafePutVolatile(HInvoke * invoke)2089 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2090   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2091 }
VisitUnsafePutObject(HInvoke * invoke)2092 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
2093   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2094 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2095 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2096   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2097 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2098 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2099   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2100 }
VisitUnsafePutLong(HInvoke * invoke)2101 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
2102   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2103 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2104 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2105   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2106 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2107 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2108   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2109 }
2110 
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)2111 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
2112                                        HInvoke* invoke) {
2113   LocationSummary* locations = new (arena) LocationSummary(invoke,
2114                                                            LocationSummary::kNoCall,
2115                                                            kIntrinsified);
2116   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2117   locations->SetInAt(1, Location::RequiresRegister());
2118   // Offset is a long, but in 32 bit mode, we only need the low word.
2119   // Can we update the invoke here to remove a TypeConvert to Long?
2120   locations->SetInAt(2, Location::RequiresRegister());
2121   // Expected value must be in EAX or EDX:EAX.
2122   // For long, new value must be in ECX:EBX.
2123   if (type == Primitive::kPrimLong) {
2124     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2125     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2126   } else {
2127     locations->SetInAt(3, Location::RegisterLocation(EAX));
2128     locations->SetInAt(4, Location::RequiresRegister());
2129   }
2130 
2131   // Force a byte register for the output.
2132   locations->SetOut(Location::RegisterLocation(EAX));
2133   if (type == Primitive::kPrimNot) {
2134     // Need temp registers for card-marking.
2135     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2136     // Need a byte register for marking.
2137     locations->AddTemp(Location::RegisterLocation(ECX));
2138   }
2139 }
2140 
VisitUnsafeCASInt(HInvoke * invoke)2141 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2142   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2143 }
2144 
VisitUnsafeCASLong(HInvoke * invoke)2145 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2146   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2147 }
2148 
VisitUnsafeCASObject(HInvoke * invoke)2149 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
2150   // The UnsafeCASObject intrinsic is missing a read barrier, and
2151   // therefore sometimes does not work as expected (b/25883050).
2152   // Turn it off temporarily as a quick fix, until the read barrier is
2153   // implemented.
2154   //
2155   // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
2156   // this intrinsic.
2157   if (kEmitCompilerReadBarrier) {
2158     return;
2159   }
2160 
2161   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2162 }
2163 
GenCAS(Primitive::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2164 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2165   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2166   LocationSummary* locations = invoke->GetLocations();
2167 
2168   Register base = locations->InAt(1).AsRegister<Register>();
2169   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2170   Location out = locations->Out();
2171   DCHECK_EQ(out.AsRegister<Register>(), EAX);
2172 
2173   if (type == Primitive::kPrimNot) {
2174     Register expected = locations->InAt(3).AsRegister<Register>();
2175     // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
2176     DCHECK_EQ(expected, EAX);
2177     Register value = locations->InAt(4).AsRegister<Register>();
2178 
2179     // Mark card for object assuming new value is stored.
2180     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2181     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2182                         locations->GetTemp(1).AsRegister<Register>(),
2183                         base,
2184                         value,
2185                         value_can_be_null);
2186 
2187     bool base_equals_value = (base == value);
2188     if (kPoisonHeapReferences) {
2189       if (base_equals_value) {
2190         // If `base` and `value` are the same register location, move
2191         // `value` to a temporary register.  This way, poisoning
2192         // `value` won't invalidate `base`.
2193         value = locations->GetTemp(0).AsRegister<Register>();
2194         __ movl(value, base);
2195       }
2196 
2197       // Check that the register allocator did not assign the location
2198       // of `expected` (EAX) to `value` nor to `base`, so that heap
2199       // poisoning (when enabled) works as intended below.
2200       // - If `value` were equal to `expected`, both references would
2201       //   be poisoned twice, meaning they would not be poisoned at
2202       //   all, as heap poisoning uses address negation.
2203       // - If `base` were equal to `expected`, poisoning `expected`
2204       //   would invalidate `base`.
2205       DCHECK_NE(value, expected);
2206       DCHECK_NE(base, expected);
2207 
2208       __ PoisonHeapReference(expected);
2209       __ PoisonHeapReference(value);
2210     }
2211 
2212     // TODO: Add a read barrier for the reference stored in the object
2213     // before attempting the CAS, similar to the one in the
2214     // art::Unsafe_compareAndSwapObject JNI implementation.
2215     //
2216     // Note that this code is not (yet) used when read barriers are
2217     // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
2218     DCHECK(!kEmitCompilerReadBarrier);
2219     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
2220 
2221     // LOCK CMPXCHG has full barrier semantics, and we don't need
2222     // scheduling barriers at this time.
2223 
2224     // Convert ZF into the boolean result.
2225     __ setb(kZero, out.AsRegister<Register>());
2226     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2227 
2228     // If heap poisoning is enabled, we need to unpoison the values
2229     // that were poisoned earlier.
2230     if (kPoisonHeapReferences) {
2231       if (base_equals_value) {
2232         // `value` has been moved to a temporary register, no need to
2233         // unpoison it.
2234       } else {
2235         // Ensure `value` is different from `out`, so that unpoisoning
2236         // the former does not invalidate the latter.
2237         DCHECK_NE(value, out.AsRegister<Register>());
2238         __ UnpoisonHeapReference(value);
2239       }
2240       // Do not unpoison the reference contained in register
2241       // `expected`, as it is the same as register `out` (EAX).
2242     }
2243   } else {
2244     if (type == Primitive::kPrimInt) {
2245       // Ensure the expected value is in EAX (required by the CMPXCHG
2246       // instruction).
2247       DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
2248       __ LockCmpxchgl(Address(base, offset, TIMES_1, 0),
2249                       locations->InAt(4).AsRegister<Register>());
2250     } else if (type == Primitive::kPrimLong) {
2251       // Ensure the expected value is in EAX:EDX and that the new
2252       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2253       DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2254       DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2255       DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2256       DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
2257       __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
2258     } else {
2259       LOG(FATAL) << "Unexpected CAS type " << type;
2260     }
2261 
2262     // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2263     // don't need scheduling barriers at this time.
2264 
2265     // Convert ZF into the boolean result.
2266     __ setb(kZero, out.AsRegister<Register>());
2267     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2268   }
2269 }
2270 
VisitUnsafeCASInt(HInvoke * invoke)2271 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2272   GenCAS(Primitive::kPrimInt, invoke, codegen_);
2273 }
2274 
VisitUnsafeCASLong(HInvoke * invoke)2275 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2276   GenCAS(Primitive::kPrimLong, invoke, codegen_);
2277 }
2278 
VisitUnsafeCASObject(HInvoke * invoke)2279 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2280   GenCAS(Primitive::kPrimNot, invoke, codegen_);
2281 }
2282 
VisitIntegerReverse(HInvoke * invoke)2283 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2284   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2285                                                            LocationSummary::kNoCall,
2286                                                            kIntrinsified);
2287   locations->SetInAt(0, Location::RequiresRegister());
2288   locations->SetOut(Location::SameAsFirstInput());
2289   locations->AddTemp(Location::RequiresRegister());
2290 }
2291 
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2292 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2293                      X86Assembler* assembler) {
2294   Immediate imm_shift(shift);
2295   Immediate imm_mask(mask);
2296   __ movl(temp, reg);
2297   __ shrl(reg, imm_shift);
2298   __ andl(temp, imm_mask);
2299   __ andl(reg, imm_mask);
2300   __ shll(temp, imm_shift);
2301   __ orl(reg, temp);
2302 }
2303 
VisitIntegerReverse(HInvoke * invoke)2304 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2305   X86Assembler* assembler = GetAssembler();
2306   LocationSummary* locations = invoke->GetLocations();
2307 
2308   Register reg = locations->InAt(0).AsRegister<Register>();
2309   Register temp = locations->GetTemp(0).AsRegister<Register>();
2310 
2311   /*
2312    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2313    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2314    * compared to generic luni implementation which has 5 rounds of swapping bits.
2315    * x = bswap x
2316    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2317    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2318    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2319    */
2320   __ bswapl(reg);
2321   SwapBits(reg, temp, 1, 0x55555555, assembler);
2322   SwapBits(reg, temp, 2, 0x33333333, assembler);
2323   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2324 }
2325 
VisitLongReverse(HInvoke * invoke)2326 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2327   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2328                                                            LocationSummary::kNoCall,
2329                                                            kIntrinsified);
2330   locations->SetInAt(0, Location::RequiresRegister());
2331   locations->SetOut(Location::SameAsFirstInput());
2332   locations->AddTemp(Location::RequiresRegister());
2333 }
2334 
VisitLongReverse(HInvoke * invoke)2335 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2336   X86Assembler* assembler = GetAssembler();
2337   LocationSummary* locations = invoke->GetLocations();
2338 
2339   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2340   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2341   Register temp = locations->GetTemp(0).AsRegister<Register>();
2342 
2343   // We want to swap high/low, then bswap each one, and then do the same
2344   // as a 32 bit reverse.
2345   // Exchange high and low.
2346   __ movl(temp, reg_low);
2347   __ movl(reg_low, reg_high);
2348   __ movl(reg_high, temp);
2349 
2350   // bit-reverse low
2351   __ bswapl(reg_low);
2352   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2353   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2354   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2355 
2356   // bit-reverse high
2357   __ bswapl(reg_high);
2358   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2359   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2360   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2361 }
2362 
CreateBitCountLocations(ArenaAllocator * arena,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2363 static void CreateBitCountLocations(
2364     ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2365   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2366     // Do nothing if there is no popcnt support. This results in generating
2367     // a call for the intrinsic rather than direct code.
2368     return;
2369   }
2370   LocationSummary* locations = new (arena) LocationSummary(invoke,
2371                                                            LocationSummary::kNoCall,
2372                                                            kIntrinsified);
2373   if (is_long) {
2374     locations->AddTemp(Location::RequiresRegister());
2375   }
2376   locations->SetInAt(0, Location::Any());
2377   locations->SetOut(Location::RequiresRegister());
2378 }
2379 
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2380 static void GenBitCount(X86Assembler* assembler,
2381                         CodeGeneratorX86* codegen,
2382                         HInvoke* invoke, bool is_long) {
2383   LocationSummary* locations = invoke->GetLocations();
2384   Location src = locations->InAt(0);
2385   Register out = locations->Out().AsRegister<Register>();
2386 
2387   if (invoke->InputAt(0)->IsConstant()) {
2388     // Evaluate this at compile time.
2389     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2390     int32_t result = is_long
2391         ? POPCOUNT(static_cast<uint64_t>(value))
2392         : POPCOUNT(static_cast<uint32_t>(value));
2393     codegen->Load32BitValue(out, result);
2394     return;
2395   }
2396 
2397   // Handle the non-constant cases.
2398   if (!is_long) {
2399     if (src.IsRegister()) {
2400       __ popcntl(out, src.AsRegister<Register>());
2401     } else {
2402       DCHECK(src.IsStackSlot());
2403       __ popcntl(out, Address(ESP, src.GetStackIndex()));
2404     }
2405   } else {
2406     // The 64-bit case needs to worry about two parts.
2407     Register temp = locations->GetTemp(0).AsRegister<Register>();
2408     if (src.IsRegisterPair()) {
2409       __ popcntl(temp, src.AsRegisterPairLow<Register>());
2410       __ popcntl(out, src.AsRegisterPairHigh<Register>());
2411     } else {
2412       DCHECK(src.IsDoubleStackSlot());
2413       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2414       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2415     }
2416     __ addl(out, temp);
2417   }
2418 }
2419 
VisitIntegerBitCount(HInvoke * invoke)2420 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2421   CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
2422 }
2423 
VisitIntegerBitCount(HInvoke * invoke)2424 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2425   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
2426 }
2427 
VisitLongBitCount(HInvoke * invoke)2428 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2429   CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
2430 }
2431 
VisitLongBitCount(HInvoke * invoke)2432 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2433   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
2434 }
2435 
CreateLeadingZeroLocations(ArenaAllocator * arena,HInvoke * invoke,bool is_long)2436 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2437   LocationSummary* locations = new (arena) LocationSummary(invoke,
2438                                                            LocationSummary::kNoCall,
2439                                                            kIntrinsified);
2440   if (is_long) {
2441     locations->SetInAt(0, Location::RequiresRegister());
2442   } else {
2443     locations->SetInAt(0, Location::Any());
2444   }
2445   locations->SetOut(Location::RequiresRegister());
2446 }
2447 
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2448 static void GenLeadingZeros(X86Assembler* assembler,
2449                             CodeGeneratorX86* codegen,
2450                             HInvoke* invoke, bool is_long) {
2451   LocationSummary* locations = invoke->GetLocations();
2452   Location src = locations->InAt(0);
2453   Register out = locations->Out().AsRegister<Register>();
2454 
2455   if (invoke->InputAt(0)->IsConstant()) {
2456     // Evaluate this at compile time.
2457     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2458     if (value == 0) {
2459       value = is_long ? 64 : 32;
2460     } else {
2461       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2462     }
2463     codegen->Load32BitValue(out, value);
2464     return;
2465   }
2466 
2467   // Handle the non-constant cases.
2468   if (!is_long) {
2469     if (src.IsRegister()) {
2470       __ bsrl(out, src.AsRegister<Register>());
2471     } else {
2472       DCHECK(src.IsStackSlot());
2473       __ bsrl(out, Address(ESP, src.GetStackIndex()));
2474     }
2475 
2476     // BSR sets ZF if the input was zero, and the output is undefined.
2477     NearLabel all_zeroes, done;
2478     __ j(kEqual, &all_zeroes);
2479 
2480     // Correct the result from BSR to get the final CLZ result.
2481     __ xorl(out, Immediate(31));
2482     __ jmp(&done);
2483 
2484     // Fix the zero case with the expected result.
2485     __ Bind(&all_zeroes);
2486     __ movl(out, Immediate(32));
2487 
2488     __ Bind(&done);
2489     return;
2490   }
2491 
2492   // 64 bit case needs to worry about both parts of the register.
2493   DCHECK(src.IsRegisterPair());
2494   Register src_lo = src.AsRegisterPairLow<Register>();
2495   Register src_hi = src.AsRegisterPairHigh<Register>();
2496   NearLabel handle_low, done, all_zeroes;
2497 
2498   // Is the high word zero?
2499   __ testl(src_hi, src_hi);
2500   __ j(kEqual, &handle_low);
2501 
2502   // High word is not zero. We know that the BSR result is defined in this case.
2503   __ bsrl(out, src_hi);
2504 
2505   // Correct the result from BSR to get the final CLZ result.
2506   __ xorl(out, Immediate(31));
2507   __ jmp(&done);
2508 
2509   // High word was zero.  We have to compute the low word count and add 32.
2510   __ Bind(&handle_low);
2511   __ bsrl(out, src_lo);
2512   __ j(kEqual, &all_zeroes);
2513 
2514   // We had a valid result.  Use an XOR to both correct the result and add 32.
2515   __ xorl(out, Immediate(63));
2516   __ jmp(&done);
2517 
2518   // All zero case.
2519   __ Bind(&all_zeroes);
2520   __ movl(out, Immediate(64));
2521 
2522   __ Bind(&done);
2523 }
2524 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2525 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2526   CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false);
2527 }
2528 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2529 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2530   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2531 }
2532 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2533 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2534   CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true);
2535 }
2536 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2537 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2538   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2539 }
2540 
CreateTrailingZeroLocations(ArenaAllocator * arena,HInvoke * invoke,bool is_long)2541 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2542   LocationSummary* locations = new (arena) LocationSummary(invoke,
2543                                                            LocationSummary::kNoCall,
2544                                                            kIntrinsified);
2545   if (is_long) {
2546     locations->SetInAt(0, Location::RequiresRegister());
2547   } else {
2548     locations->SetInAt(0, Location::Any());
2549   }
2550   locations->SetOut(Location::RequiresRegister());
2551 }
2552 
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2553 static void GenTrailingZeros(X86Assembler* assembler,
2554                              CodeGeneratorX86* codegen,
2555                              HInvoke* invoke, bool is_long) {
2556   LocationSummary* locations = invoke->GetLocations();
2557   Location src = locations->InAt(0);
2558   Register out = locations->Out().AsRegister<Register>();
2559 
2560   if (invoke->InputAt(0)->IsConstant()) {
2561     // Evaluate this at compile time.
2562     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2563     if (value == 0) {
2564       value = is_long ? 64 : 32;
2565     } else {
2566       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2567     }
2568     codegen->Load32BitValue(out, value);
2569     return;
2570   }
2571 
2572   // Handle the non-constant cases.
2573   if (!is_long) {
2574     if (src.IsRegister()) {
2575       __ bsfl(out, src.AsRegister<Register>());
2576     } else {
2577       DCHECK(src.IsStackSlot());
2578       __ bsfl(out, Address(ESP, src.GetStackIndex()));
2579     }
2580 
2581     // BSF sets ZF if the input was zero, and the output is undefined.
2582     NearLabel done;
2583     __ j(kNotEqual, &done);
2584 
2585     // Fix the zero case with the expected result.
2586     __ movl(out, Immediate(32));
2587 
2588     __ Bind(&done);
2589     return;
2590   }
2591 
2592   // 64 bit case needs to worry about both parts of the register.
2593   DCHECK(src.IsRegisterPair());
2594   Register src_lo = src.AsRegisterPairLow<Register>();
2595   Register src_hi = src.AsRegisterPairHigh<Register>();
2596   NearLabel done, all_zeroes;
2597 
2598   // If the low word is zero, then ZF will be set.  If not, we have the answer.
2599   __ bsfl(out, src_lo);
2600   __ j(kNotEqual, &done);
2601 
2602   // Low word was zero.  We have to compute the high word count and add 32.
2603   __ bsfl(out, src_hi);
2604   __ j(kEqual, &all_zeroes);
2605 
2606   // We had a valid result.  Add 32 to account for the low word being zero.
2607   __ addl(out, Immediate(32));
2608   __ jmp(&done);
2609 
2610   // All zero case.
2611   __ Bind(&all_zeroes);
2612   __ movl(out, Immediate(64));
2613 
2614   __ Bind(&done);
2615 }
2616 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2617 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2618   CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false);
2619 }
2620 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2621 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2622   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2623 }
2624 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2625 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2626   CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true);
2627 }
2628 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2629 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2630   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2631 }
2632 
2633 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
2634 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
2635 UNIMPLEMENTED_INTRINSIC(X86, SystemArrayCopy)
2636 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
2637 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
2638 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
2639 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
2640 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
2641 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
2642 
2643 // 1.8.
2644 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
2645 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
2646 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
2647 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
2648 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
2649 
2650 UNREACHABLE_INTRINSICS(X86)
2651 
2652 #undef __
2653 
2654 }  // namespace x86
2655 }  // namespace art
2656