1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86.h"
18 
19 #include <limits>
20 
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_utils.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object_array-inl.h"
32 #include "mirror/reference.h"
33 #include "mirror/string.h"
34 #include "scoped_thread_state_change-inl.h"
35 #include "thread-current-inl.h"
36 #include "utils/x86/assembler_x86.h"
37 #include "utils/x86/constants_x86.h"
38 
39 namespace art {
40 
41 namespace x86 {
42 
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)43 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
44   : allocator_(codegen->GetGraph()->GetAllocator()),
45     codegen_(codegen) {
46 }
47 
48 
GetAssembler()49 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
50   return down_cast<X86Assembler*>(codegen_->GetAssembler());
51 }
52 
GetAllocator()53 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
54   return codegen_->GetGraph()->GetAllocator();
55 }
56 
TryDispatch(HInvoke * invoke)57 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
58   Dispatch(invoke);
59   LocationSummary* res = invoke->GetLocations();
60   if (res == nullptr) {
61     return false;
62   }
63   return res->Intrinsified();
64 }
65 
MoveArguments(HInvoke * invoke,CodeGeneratorX86 * codegen)66 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
67   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
68   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
69 }
70 
71 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
72 
73 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
74 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
75 
76 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
77 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
78  public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)79   explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
80       : SlowPathCode(instruction) {
81     DCHECK(kEmitCompilerReadBarrier);
82     DCHECK(kUseBakerReadBarrier);
83   }
84 
EmitNativeCode(CodeGenerator * codegen)85   void EmitNativeCode(CodeGenerator* codegen) override {
86     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
87     LocationSummary* locations = instruction_->GetLocations();
88     DCHECK(locations->CanCall());
89     DCHECK(instruction_->IsInvokeStaticOrDirect())
90         << "Unexpected instruction in read barrier arraycopy slow path: "
91         << instruction_->DebugName();
92     DCHECK(instruction_->GetLocations()->Intrinsified());
93     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
94 
95     int32_t element_size = DataType::Size(DataType::Type::kReference);
96     uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
97 
98     Register src = locations->InAt(0).AsRegister<Register>();
99     Location src_pos = locations->InAt(1);
100     Register dest = locations->InAt(2).AsRegister<Register>();
101     Location dest_pos = locations->InAt(3);
102     Location length = locations->InAt(4);
103     Location temp1_loc = locations->GetTemp(0);
104     Register temp1 = temp1_loc.AsRegister<Register>();
105     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
106     Register temp3 = locations->GetTemp(2).AsRegister<Register>();
107 
108     __ Bind(GetEntryLabel());
109     // In this code path, registers `temp1`, `temp2`, and `temp3`
110     // (resp.) are not used for the base source address, the base
111     // destination address, and the end source address (resp.), as in
112     // other SystemArrayCopy intrinsic code paths.  Instead they are
113     // (resp.) used for:
114     // - the loop index (`i`);
115     // - the source index (`src_index`) and the loaded (source)
116     //   reference (`value`); and
117     // - the destination index (`dest_index`).
118 
119     // i = 0
120     __ xorl(temp1, temp1);
121     NearLabel loop;
122     __ Bind(&loop);
123     // value = src_array[i + src_pos]
124     if (src_pos.IsConstant()) {
125       int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
126       int32_t adjusted_offset = offset + constant * element_size;
127       __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
128     } else {
129       __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
130       __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
131     }
132     __ MaybeUnpoisonHeapReference(temp2);
133     // TODO: Inline the mark bit check before calling the runtime?
134     // value = ReadBarrier::Mark(value)
135     // No need to save live registers; it's taken care of by the
136     // entrypoint. Also, there is no need to update the stack mask,
137     // as this runtime call will not trigger a garbage collection.
138     // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
139     // explanations.)
140     DCHECK_NE(temp2, ESP);
141     DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
142     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
143     // This runtime call does not require a stack map.
144     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
145     __ MaybePoisonHeapReference(temp2);
146     // dest_array[i + dest_pos] = value
147     if (dest_pos.IsConstant()) {
148       int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
149       int32_t adjusted_offset = offset + constant * element_size;
150       __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
151     } else {
152       __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
153       __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
154     }
155     // ++i
156     __ addl(temp1, Immediate(1));
157     // if (i != length) goto loop
158     x86_codegen->GenerateIntCompare(temp1_loc, length);
159     __ j(kNotEqual, &loop);
160     __ jmp(GetExitLabel());
161   }
162 
GetDescription() const163   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
164 
165  private:
166   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
167 };
168 
169 #undef __
170 
171 #define __ assembler->
172 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)173 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
174   LocationSummary* locations =
175       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
176   locations->SetInAt(0, Location::RequiresFpuRegister());
177   locations->SetOut(Location::RequiresRegister());
178   if (is64bit) {
179     locations->AddTemp(Location::RequiresFpuRegister());
180   }
181 }
182 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)183 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
184   LocationSummary* locations =
185       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
186   locations->SetInAt(0, Location::RequiresRegister());
187   locations->SetOut(Location::RequiresFpuRegister());
188   if (is64bit) {
189     locations->AddTemp(Location::RequiresFpuRegister());
190     locations->AddTemp(Location::RequiresFpuRegister());
191   }
192 }
193 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)194 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
195   Location input = locations->InAt(0);
196   Location output = locations->Out();
197   if (is64bit) {
198     // Need to use the temporary.
199     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
200     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
201     __ movd(output.AsRegisterPairLow<Register>(), temp);
202     __ psrlq(temp, Immediate(32));
203     __ movd(output.AsRegisterPairHigh<Register>(), temp);
204   } else {
205     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
206   }
207 }
208 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)209 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
210   Location input = locations->InAt(0);
211   Location output = locations->Out();
212   if (is64bit) {
213     // Need to use the temporary.
214     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
215     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
216     __ movd(temp1, input.AsRegisterPairLow<Register>());
217     __ movd(temp2, input.AsRegisterPairHigh<Register>());
218     __ punpckldq(temp1, temp2);
219     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
220   } else {
221     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
222   }
223 }
224 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)225 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
226   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
227 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)228 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
229   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
230 }
231 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)232 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
233   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
234 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)235 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
236   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
237 }
238 
VisitFloatFloatToRawIntBits(HInvoke * invoke)239 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
240   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
241 }
VisitFloatIntBitsToFloat(HInvoke * invoke)242 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
243   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
244 }
245 
VisitFloatFloatToRawIntBits(HInvoke * invoke)246 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
247   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
248 }
VisitFloatIntBitsToFloat(HInvoke * invoke)249 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
250   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
251 }
252 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)253 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
254   LocationSummary* locations =
255       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
256   locations->SetInAt(0, Location::RequiresRegister());
257   locations->SetOut(Location::SameAsFirstInput());
258 }
259 
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)260 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
261   LocationSummary* locations =
262       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
263   locations->SetInAt(0, Location::RequiresRegister());
264   locations->SetOut(Location::RequiresRegister());
265 }
266 
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)267 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
268   LocationSummary* locations =
269       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
270   locations->SetInAt(0, Location::RequiresRegister());
271   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
272 }
273 
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)274 static void GenReverseBytes(LocationSummary* locations,
275                             DataType::Type size,
276                             X86Assembler* assembler) {
277   Register out = locations->Out().AsRegister<Register>();
278 
279   switch (size) {
280     case DataType::Type::kInt16:
281       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
282       __ bswapl(out);
283       __ sarl(out, Immediate(16));
284       break;
285     case DataType::Type::kInt32:
286       __ bswapl(out);
287       break;
288     default:
289       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
290       UNREACHABLE();
291   }
292 }
293 
VisitIntegerReverseBytes(HInvoke * invoke)294 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
295   CreateIntToIntLocations(allocator_, invoke);
296 }
297 
VisitIntegerReverseBytes(HInvoke * invoke)298 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
299   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
300 }
301 
VisitLongReverseBytes(HInvoke * invoke)302 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
303   CreateLongToLongLocations(allocator_, invoke);
304 }
305 
VisitLongReverseBytes(HInvoke * invoke)306 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
307   LocationSummary* locations = invoke->GetLocations();
308   Location input = locations->InAt(0);
309   Register input_lo = input.AsRegisterPairLow<Register>();
310   Register input_hi = input.AsRegisterPairHigh<Register>();
311   Location output = locations->Out();
312   Register output_lo = output.AsRegisterPairLow<Register>();
313   Register output_hi = output.AsRegisterPairHigh<Register>();
314 
315   X86Assembler* assembler = GetAssembler();
316   // Assign the inputs to the outputs, mixing low/high.
317   __ movl(output_lo, input_hi);
318   __ movl(output_hi, input_lo);
319   __ bswapl(output_lo);
320   __ bswapl(output_hi);
321 }
322 
VisitShortReverseBytes(HInvoke * invoke)323 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
324   CreateIntToIntLocations(allocator_, invoke);
325 }
326 
VisitShortReverseBytes(HInvoke * invoke)327 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
328   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
329 }
330 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)331 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
332   LocationSummary* locations =
333       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
334   locations->SetInAt(0, Location::RequiresFpuRegister());
335   locations->SetOut(Location::RequiresFpuRegister());
336 }
337 
VisitMathSqrt(HInvoke * invoke)338 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
339   CreateFPToFPLocations(allocator_, invoke);
340 }
341 
VisitMathSqrt(HInvoke * invoke)342 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
343   LocationSummary* locations = invoke->GetLocations();
344   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
345   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
346 
347   GetAssembler()->sqrtsd(out, in);
348 }
349 
InvokeOutOfLineIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke)350 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
351   MoveArguments(invoke, codegen);
352 
353   DCHECK(invoke->IsInvokeStaticOrDirect());
354   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
355                                       Location::RegisterLocation(EAX));
356 
357   // Copy the result back to the expected output.
358   Location out = invoke->GetLocations()->Out();
359   if (out.IsValid()) {
360     DCHECK(out.IsRegister());
361     codegen->MoveFromReturnRegister(out, invoke->GetType());
362   }
363 }
364 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)365 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
366                                        HInvoke* invoke,
367                                        CodeGeneratorX86* codegen) {
368   // Do we have instruction support?
369   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
370     CreateFPToFPLocations(allocator, invoke);
371     return;
372   }
373 
374   // We have to fall back to a call to the intrinsic.
375   LocationSummary* locations =
376       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
377   InvokeRuntimeCallingConvention calling_convention;
378   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
379   locations->SetOut(Location::FpuRegisterLocation(XMM0));
380   // Needs to be EAX for the invoke.
381   locations->AddTemp(Location::RegisterLocation(EAX));
382 }
383 
GenSSE41FPToFPIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke,X86Assembler * assembler,int round_mode)384 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
385                                    HInvoke* invoke,
386                                    X86Assembler* assembler,
387                                    int round_mode) {
388   LocationSummary* locations = invoke->GetLocations();
389   if (locations->WillCall()) {
390     InvokeOutOfLineIntrinsic(codegen, invoke);
391   } else {
392     XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
393     XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
394     __ roundsd(out, in, Immediate(round_mode));
395   }
396 }
397 
VisitMathCeil(HInvoke * invoke)398 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
399   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
400 }
401 
VisitMathCeil(HInvoke * invoke)402 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
403   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
404 }
405 
VisitMathFloor(HInvoke * invoke)406 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
407   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
408 }
409 
VisitMathFloor(HInvoke * invoke)410 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
411   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
412 }
413 
VisitMathRint(HInvoke * invoke)414 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
415   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
416 }
417 
VisitMathRint(HInvoke * invoke)418 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
419   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
420 }
421 
VisitMathRoundFloat(HInvoke * invoke)422 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
423   // Do we have instruction support?
424   if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
425     HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
426     DCHECK(static_or_direct != nullptr);
427     LocationSummary* locations =
428         new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
429     locations->SetInAt(0, Location::RequiresFpuRegister());
430     if (static_or_direct->HasSpecialInput() &&
431         invoke->InputAt(
432             static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
433       locations->SetInAt(1, Location::RequiresRegister());
434     }
435     locations->SetOut(Location::RequiresRegister());
436     locations->AddTemp(Location::RequiresFpuRegister());
437     locations->AddTemp(Location::RequiresFpuRegister());
438     return;
439   }
440 
441   // We have to fall back to a call to the intrinsic.
442   LocationSummary* locations =
443       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
444   InvokeRuntimeCallingConvention calling_convention;
445   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
446   locations->SetOut(Location::RegisterLocation(EAX));
447   // Needs to be EAX for the invoke.
448   locations->AddTemp(Location::RegisterLocation(EAX));
449 }
450 
VisitMathRoundFloat(HInvoke * invoke)451 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
452   LocationSummary* locations = invoke->GetLocations();
453   if (locations->WillCall()) {  // TODO: can we reach this?
454     InvokeOutOfLineIntrinsic(codegen_, invoke);
455     return;
456   }
457 
458   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
459   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
460   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
461   Register out = locations->Out().AsRegister<Register>();
462   NearLabel skip_incr, done;
463   X86Assembler* assembler = GetAssembler();
464 
465   // Since no direct x86 rounding instruction matches the required semantics,
466   // this intrinsic is implemented as follows:
467   //  result = floor(in);
468   //  if (in - result >= 0.5f)
469   //    result = result + 1.0f;
470   __ movss(t2, in);
471   __ roundss(t1, in, Immediate(1));
472   __ subss(t2, t1);
473   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
474     // Direct constant area available.
475     HX86ComputeBaseMethodAddress* method_address =
476         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
477     Register constant_area = locations->InAt(1).AsRegister<Register>();
478     __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
479                                                 method_address,
480                                                 constant_area));
481     __ j(kBelow, &skip_incr);
482     __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
483                                                method_address,
484                                                constant_area));
485     __ Bind(&skip_incr);
486   } else {
487     // No constant area: go through stack.
488     __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
489     __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
490     __ comiss(t2, Address(ESP, 4));
491     __ j(kBelow, &skip_incr);
492     __ addss(t1, Address(ESP, 0));
493     __ Bind(&skip_incr);
494     __ addl(ESP, Immediate(8));
495   }
496 
497   // Final conversion to an integer. Unfortunately this also does not have a
498   // direct x86 instruction, since NaN should map to 0 and large positive
499   // values need to be clipped to the extreme value.
500   __ movl(out, Immediate(kPrimIntMax));
501   __ cvtsi2ss(t2, out);
502   __ comiss(t1, t2);
503   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
504   __ movl(out, Immediate(0));  // does not change flags
505   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
506   __ cvttss2si(out, t1);
507   __ Bind(&done);
508 }
509 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)510 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
511   LocationSummary* locations =
512       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
513   InvokeRuntimeCallingConvention calling_convention;
514   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
515   locations->SetOut(Location::FpuRegisterLocation(XMM0));
516 }
517 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)518 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
519   LocationSummary* locations = invoke->GetLocations();
520   DCHECK(locations->WillCall());
521   DCHECK(invoke->IsInvokeStaticOrDirect());
522   X86Assembler* assembler = codegen->GetAssembler();
523 
524   // We need some place to pass the parameters.
525   __ subl(ESP, Immediate(16));
526   __ cfi().AdjustCFAOffset(16);
527 
528   // Pass the parameters at the bottom of the stack.
529   __ movsd(Address(ESP, 0), XMM0);
530 
531   // If we have a second parameter, pass it next.
532   if (invoke->GetNumberOfArguments() == 2) {
533     __ movsd(Address(ESP, 8), XMM1);
534   }
535 
536   // Now do the actual call.
537   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
538 
539   // Extract the return value from the FP stack.
540   __ fstpl(Address(ESP, 0));
541   __ movsd(XMM0, Address(ESP, 0));
542 
543   // And clean up the stack.
544   __ addl(ESP, Immediate(16));
545   __ cfi().AdjustCFAOffset(-16);
546 }
547 
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)548 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
549   LocationSummary* locations =
550       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
551   if (is_long) {
552     locations->SetInAt(0, Location::RequiresRegister());
553   } else {
554     locations->SetInAt(0, Location::Any());
555   }
556   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
557 }
558 
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)559 static void GenLowestOneBit(X86Assembler* assembler,
560                       CodeGeneratorX86* codegen,
561                       bool is_long,
562                       HInvoke* invoke) {
563   LocationSummary* locations = invoke->GetLocations();
564   Location src = locations->InAt(0);
565   Location out_loc = locations->Out();
566 
567   if (invoke->InputAt(0)->IsConstant()) {
568     // Evaluate this at compile time.
569     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
570     if (value == 0) {
571       if (is_long) {
572         __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
573         __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
574       } else {
575         __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
576       }
577       return;
578     }
579     // Nonzero value.
580     value = is_long ? CTZ(static_cast<uint64_t>(value))
581                     : CTZ(static_cast<uint32_t>(value));
582     if (is_long) {
583       if (value >= 32) {
584         int shift = value-32;
585         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
586         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
587       } else {
588         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
589         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
590       }
591     } else {
592       codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
593     }
594     return;
595   }
596   // Handle non constant case
597   if (is_long) {
598     DCHECK(src.IsRegisterPair());
599     Register src_lo = src.AsRegisterPairLow<Register>();
600     Register src_hi = src.AsRegisterPairHigh<Register>();
601 
602     Register out_lo = out_loc.AsRegisterPairLow<Register>();
603     Register out_hi = out_loc.AsRegisterPairHigh<Register>();
604 
605     __ movl(out_lo, src_lo);
606     __ movl(out_hi, src_hi);
607 
608     __ negl(out_lo);
609     __ adcl(out_hi, Immediate(0));
610     __ negl(out_hi);
611 
612     __ andl(out_lo, src_lo);
613     __ andl(out_hi, src_hi);
614   } else {
615     if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
616       Register out = out_loc.AsRegister<Register>();
617       __ blsi(out, src.AsRegister<Register>());
618     } else {
619       Register out = out_loc.AsRegister<Register>();
620       // Do tmp & -tmp
621       if (src.IsRegister()) {
622         __ movl(out, src.AsRegister<Register>());
623       } else {
624         DCHECK(src.IsStackSlot());
625         __ movl(out, Address(ESP, src.GetStackIndex()));
626       }
627       __ negl(out);
628 
629       if (src.IsRegister()) {
630         __ andl(out, src.AsRegister<Register>());
631       } else {
632         __ andl(out, Address(ESP, src.GetStackIndex()));
633       }
634     }
635   }
636 }
637 
VisitMathCos(HInvoke * invoke)638 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
639   CreateFPToFPCallLocations(allocator_, invoke);
640 }
641 
VisitMathCos(HInvoke * invoke)642 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
643   GenFPToFPCall(invoke, codegen_, kQuickCos);
644 }
645 
VisitMathSin(HInvoke * invoke)646 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
647   CreateFPToFPCallLocations(allocator_, invoke);
648 }
649 
VisitMathSin(HInvoke * invoke)650 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
651   GenFPToFPCall(invoke, codegen_, kQuickSin);
652 }
653 
VisitMathAcos(HInvoke * invoke)654 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
655   CreateFPToFPCallLocations(allocator_, invoke);
656 }
657 
VisitMathAcos(HInvoke * invoke)658 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
659   GenFPToFPCall(invoke, codegen_, kQuickAcos);
660 }
661 
VisitMathAsin(HInvoke * invoke)662 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
663   CreateFPToFPCallLocations(allocator_, invoke);
664 }
665 
VisitMathAsin(HInvoke * invoke)666 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
667   GenFPToFPCall(invoke, codegen_, kQuickAsin);
668 }
669 
VisitMathAtan(HInvoke * invoke)670 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
671   CreateFPToFPCallLocations(allocator_, invoke);
672 }
673 
VisitMathAtan(HInvoke * invoke)674 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
675   GenFPToFPCall(invoke, codegen_, kQuickAtan);
676 }
677 
VisitMathCbrt(HInvoke * invoke)678 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
679   CreateFPToFPCallLocations(allocator_, invoke);
680 }
681 
VisitMathCbrt(HInvoke * invoke)682 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
683   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
684 }
685 
VisitMathCosh(HInvoke * invoke)686 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
687   CreateFPToFPCallLocations(allocator_, invoke);
688 }
689 
VisitMathCosh(HInvoke * invoke)690 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
691   GenFPToFPCall(invoke, codegen_, kQuickCosh);
692 }
693 
VisitMathExp(HInvoke * invoke)694 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
695   CreateFPToFPCallLocations(allocator_, invoke);
696 }
697 
VisitMathExp(HInvoke * invoke)698 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
699   GenFPToFPCall(invoke, codegen_, kQuickExp);
700 }
701 
VisitMathExpm1(HInvoke * invoke)702 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
703   CreateFPToFPCallLocations(allocator_, invoke);
704 }
705 
VisitMathExpm1(HInvoke * invoke)706 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
707   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
708 }
709 
VisitMathLog(HInvoke * invoke)710 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
711   CreateFPToFPCallLocations(allocator_, invoke);
712 }
713 
VisitMathLog(HInvoke * invoke)714 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
715   GenFPToFPCall(invoke, codegen_, kQuickLog);
716 }
717 
VisitMathLog10(HInvoke * invoke)718 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
719   CreateFPToFPCallLocations(allocator_, invoke);
720 }
721 
VisitMathLog10(HInvoke * invoke)722 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
723   GenFPToFPCall(invoke, codegen_, kQuickLog10);
724 }
725 
VisitMathSinh(HInvoke * invoke)726 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
727   CreateFPToFPCallLocations(allocator_, invoke);
728 }
729 
VisitMathSinh(HInvoke * invoke)730 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
731   GenFPToFPCall(invoke, codegen_, kQuickSinh);
732 }
733 
VisitMathTan(HInvoke * invoke)734 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
735   CreateFPToFPCallLocations(allocator_, invoke);
736 }
737 
VisitMathTan(HInvoke * invoke)738 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
739   GenFPToFPCall(invoke, codegen_, kQuickTan);
740 }
741 
VisitMathTanh(HInvoke * invoke)742 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
743   CreateFPToFPCallLocations(allocator_, invoke);
744 }
745 
VisitMathTanh(HInvoke * invoke)746 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
747   GenFPToFPCall(invoke, codegen_, kQuickTanh);
748 }
749 
VisitIntegerLowestOneBit(HInvoke * invoke)750 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
751   CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
752 }
VisitIntegerLowestOneBit(HInvoke * invoke)753 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
754   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
755 }
756 
VisitLongLowestOneBit(HInvoke * invoke)757 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
758   CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
759 }
760 
VisitLongLowestOneBit(HInvoke * invoke)761 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
762   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
763 }
764 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)765 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
766   LocationSummary* locations =
767       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
768   InvokeRuntimeCallingConvention calling_convention;
769   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
770   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
771   locations->SetOut(Location::FpuRegisterLocation(XMM0));
772 }
773 
VisitMathAtan2(HInvoke * invoke)774 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
775   CreateFPFPToFPCallLocations(allocator_, invoke);
776 }
777 
VisitMathAtan2(HInvoke * invoke)778 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
779   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
780 }
781 
VisitMathPow(HInvoke * invoke)782 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
783   CreateFPFPToFPCallLocations(allocator_, invoke);
784 }
785 
VisitMathPow(HInvoke * invoke)786 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
787   GenFPToFPCall(invoke, codegen_, kQuickPow);
788 }
789 
VisitMathHypot(HInvoke * invoke)790 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
791   CreateFPFPToFPCallLocations(allocator_, invoke);
792 }
793 
VisitMathHypot(HInvoke * invoke)794 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
795   GenFPToFPCall(invoke, codegen_, kQuickHypot);
796 }
797 
VisitMathNextAfter(HInvoke * invoke)798 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
799   CreateFPFPToFPCallLocations(allocator_, invoke);
800 }
801 
VisitMathNextAfter(HInvoke * invoke)802 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
803   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
804 }
805 
VisitSystemArrayCopyChar(HInvoke * invoke)806 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
807   // We need at least two of the positions or length to be an integer constant,
808   // or else we won't have enough free registers.
809   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
810   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
811   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
812 
813   int num_constants =
814       ((src_pos != nullptr) ? 1 : 0)
815       + ((dest_pos != nullptr) ? 1 : 0)
816       + ((length != nullptr) ? 1 : 0);
817 
818   if (num_constants < 2) {
819     // Not enough free registers.
820     return;
821   }
822 
823   // As long as we are checking, we might as well check to see if the src and dest
824   // positions are >= 0.
825   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
826       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
827     // We will have to fail anyways.
828     return;
829   }
830 
831   // And since we are already checking, check the length too.
832   if (length != nullptr) {
833     int32_t len = length->GetValue();
834     if (len < 0) {
835       // Just call as normal.
836       return;
837     }
838   }
839 
840   // Okay, it is safe to generate inline code.
841   LocationSummary* locations =
842       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
843   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
844   locations->SetInAt(0, Location::RequiresRegister());
845   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
846   locations->SetInAt(2, Location::RequiresRegister());
847   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
848   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
849 
850   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
851   locations->AddTemp(Location::RegisterLocation(ESI));
852   locations->AddTemp(Location::RegisterLocation(EDI));
853   locations->AddTemp(Location::RegisterLocation(ECX));
854 }
855 
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)856 static void CheckPosition(X86Assembler* assembler,
857                           Location pos,
858                           Register input,
859                           Location length,
860                           SlowPathCode* slow_path,
861                           Register temp,
862                           bool length_is_input_length = false) {
863   // Where is the length in the Array?
864   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
865 
866   if (pos.IsConstant()) {
867     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
868     if (pos_const == 0) {
869       if (!length_is_input_length) {
870         // Check that length(input) >= length.
871         if (length.IsConstant()) {
872           __ cmpl(Address(input, length_offset),
873                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
874         } else {
875           __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
876         }
877         __ j(kLess, slow_path->GetEntryLabel());
878       }
879     } else {
880       // Check that length(input) >= pos.
881       __ movl(temp, Address(input, length_offset));
882       __ subl(temp, Immediate(pos_const));
883       __ j(kLess, slow_path->GetEntryLabel());
884 
885       // Check that (length(input) - pos) >= length.
886       if (length.IsConstant()) {
887         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
888       } else {
889         __ cmpl(temp, length.AsRegister<Register>());
890       }
891       __ j(kLess, slow_path->GetEntryLabel());
892     }
893   } else if (length_is_input_length) {
894     // The only way the copy can succeed is if pos is zero.
895     Register pos_reg = pos.AsRegister<Register>();
896     __ testl(pos_reg, pos_reg);
897     __ j(kNotEqual, slow_path->GetEntryLabel());
898   } else {
899     // Check that pos >= 0.
900     Register pos_reg = pos.AsRegister<Register>();
901     __ testl(pos_reg, pos_reg);
902     __ j(kLess, slow_path->GetEntryLabel());
903 
904     // Check that pos <= length(input).
905     __ cmpl(Address(input, length_offset), pos_reg);
906     __ j(kLess, slow_path->GetEntryLabel());
907 
908     // Check that (length(input) - pos) >= length.
909     __ movl(temp, Address(input, length_offset));
910     __ subl(temp, pos_reg);
911     if (length.IsConstant()) {
912       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
913     } else {
914       __ cmpl(temp, length.AsRegister<Register>());
915     }
916     __ j(kLess, slow_path->GetEntryLabel());
917   }
918 }
919 
VisitSystemArrayCopyChar(HInvoke * invoke)920 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
921   X86Assembler* assembler = GetAssembler();
922   LocationSummary* locations = invoke->GetLocations();
923 
924   Register src = locations->InAt(0).AsRegister<Register>();
925   Location srcPos = locations->InAt(1);
926   Register dest = locations->InAt(2).AsRegister<Register>();
927   Location destPos = locations->InAt(3);
928   Location length = locations->InAt(4);
929 
930   // Temporaries that we need for MOVSW.
931   Register src_base = locations->GetTemp(0).AsRegister<Register>();
932   DCHECK_EQ(src_base, ESI);
933   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
934   DCHECK_EQ(dest_base, EDI);
935   Register count = locations->GetTemp(2).AsRegister<Register>();
936   DCHECK_EQ(count, ECX);
937 
938   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
939   codegen_->AddSlowPath(slow_path);
940 
941   // Bail out if the source and destination are the same (to handle overlap).
942   __ cmpl(src, dest);
943   __ j(kEqual, slow_path->GetEntryLabel());
944 
945   // Bail out if the source is null.
946   __ testl(src, src);
947   __ j(kEqual, slow_path->GetEntryLabel());
948 
949   // Bail out if the destination is null.
950   __ testl(dest, dest);
951   __ j(kEqual, slow_path->GetEntryLabel());
952 
953   // If the length is negative, bail out.
954   // We have already checked in the LocationsBuilder for the constant case.
955   if (!length.IsConstant()) {
956     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
957     __ j(kLess, slow_path->GetEntryLabel());
958   }
959 
960   // We need the count in ECX.
961   if (length.IsConstant()) {
962     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
963   } else {
964     __ movl(count, length.AsRegister<Register>());
965   }
966 
967   // Validity checks: source. Use src_base as a temporary register.
968   CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
969 
970   // Validity checks: dest. Use src_base as a temporary register.
971   CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
972 
973   // Okay, everything checks out.  Finally time to do the copy.
974   // Check assumption that sizeof(Char) is 2 (used in scaling below).
975   const size_t char_size = DataType::Size(DataType::Type::kUint16);
976   DCHECK_EQ(char_size, 2u);
977 
978   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
979 
980   if (srcPos.IsConstant()) {
981     int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
982     __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
983   } else {
984     __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
985                               ScaleFactor::TIMES_2, data_offset));
986   }
987   if (destPos.IsConstant()) {
988     int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
989 
990     __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
991   } else {
992     __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
993                                ScaleFactor::TIMES_2, data_offset));
994   }
995 
996   // Do the move.
997   __ rep_movsw();
998 
999   __ Bind(slow_path->GetExitLabel());
1000 }
1001 
VisitStringCompareTo(HInvoke * invoke)1002 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1003   // The inputs plus one temp.
1004   LocationSummary* locations = new (allocator_) LocationSummary(
1005       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1006   InvokeRuntimeCallingConvention calling_convention;
1007   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1008   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1009   locations->SetOut(Location::RegisterLocation(EAX));
1010 }
1011 
VisitStringCompareTo(HInvoke * invoke)1012 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1013   X86Assembler* assembler = GetAssembler();
1014   LocationSummary* locations = invoke->GetLocations();
1015 
1016   // Note that the null check must have been done earlier.
1017   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1018 
1019   Register argument = locations->InAt(1).AsRegister<Register>();
1020   __ testl(argument, argument);
1021   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1022   codegen_->AddSlowPath(slow_path);
1023   __ j(kEqual, slow_path->GetEntryLabel());
1024 
1025   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1026   __ Bind(slow_path->GetExitLabel());
1027 }
1028 
VisitStringEquals(HInvoke * invoke)1029 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1030   LocationSummary* locations =
1031       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1032   locations->SetInAt(0, Location::RequiresRegister());
1033   locations->SetInAt(1, Location::RequiresRegister());
1034 
1035   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1036   locations->AddTemp(Location::RegisterLocation(ECX));
1037   locations->AddTemp(Location::RegisterLocation(EDI));
1038 
1039   // Set output, ESI needed for repe_cmpsl instruction anyways.
1040   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1041 }
1042 
VisitStringEquals(HInvoke * invoke)1043 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1044   X86Assembler* assembler = GetAssembler();
1045   LocationSummary* locations = invoke->GetLocations();
1046 
1047   Register str = locations->InAt(0).AsRegister<Register>();
1048   Register arg = locations->InAt(1).AsRegister<Register>();
1049   Register ecx = locations->GetTemp(0).AsRegister<Register>();
1050   Register edi = locations->GetTemp(1).AsRegister<Register>();
1051   Register esi = locations->Out().AsRegister<Register>();
1052 
1053   NearLabel end, return_true, return_false;
1054 
1055   // Get offsets of count, value, and class fields within a string object.
1056   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1057   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1058   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1059 
1060   // Note that the null check must have been done earlier.
1061   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1062 
1063   StringEqualsOptimizations optimizations(invoke);
1064   if (!optimizations.GetArgumentNotNull()) {
1065     // Check if input is null, return false if it is.
1066     __ testl(arg, arg);
1067     __ j(kEqual, &return_false);
1068   }
1069 
1070   if (!optimizations.GetArgumentIsString()) {
1071     // Instanceof check for the argument by comparing class fields.
1072     // All string objects must have the same type since String cannot be subclassed.
1073     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1074     // If the argument is a string object, its class field must be equal to receiver's class field.
1075     //
1076     // As the String class is expected to be non-movable, we can read the class
1077     // field from String.equals' arguments without read barriers.
1078     AssertNonMovableStringClass();
1079     // Also, because we use the loaded class references only to compare them, we
1080     // don't need to unpoison them.
1081     // /* HeapReference<Class> */ ecx = str->klass_
1082     __ movl(ecx, Address(str, class_offset));
1083     // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1084     __ cmpl(ecx, Address(arg, class_offset));
1085     __ j(kNotEqual, &return_false);
1086   }
1087 
1088   // Reference equality check, return true if same reference.
1089   __ cmpl(str, arg);
1090   __ j(kEqual, &return_true);
1091 
1092   // Load length and compression flag of receiver string.
1093   __ movl(ecx, Address(str, count_offset));
1094   // Check if lengths and compression flags are equal, return false if they're not.
1095   // Two identical strings will always have same compression style since
1096   // compression style is decided on alloc.
1097   __ cmpl(ecx, Address(arg, count_offset));
1098   __ j(kNotEqual, &return_false);
1099   // Return true if strings are empty. Even with string compression `count == 0` means empty.
1100   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1101                 "Expecting 0=compressed, 1=uncompressed");
1102   __ jecxz(&return_true);
1103 
1104   if (mirror::kUseStringCompression) {
1105     NearLabel string_uncompressed;
1106     // Extract length and differentiate between both compressed or both uncompressed.
1107     // Different compression style is cut above.
1108     __ shrl(ecx, Immediate(1));
1109     __ j(kCarrySet, &string_uncompressed);
1110     // Divide string length by 2, rounding up, and continue as if uncompressed.
1111     __ addl(ecx, Immediate(1));
1112     __ shrl(ecx, Immediate(1));
1113     __ Bind(&string_uncompressed);
1114   }
1115   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1116   __ leal(esi, Address(str, value_offset));
1117   __ leal(edi, Address(arg, value_offset));
1118 
1119   // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1120   // divisible by 2.
1121   __ addl(ecx, Immediate(1));
1122   __ shrl(ecx, Immediate(1));
1123 
1124   // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1125   // or 4 characters (compressed) at a time.
1126   DCHECK_ALIGNED(value_offset, 4);
1127   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1128 
1129   // Loop to compare strings two characters at a time starting at the beginning of the string.
1130   __ repe_cmpsl();
1131   // If strings are not equal, zero flag will be cleared.
1132   __ j(kNotEqual, &return_false);
1133 
1134   // Return true and exit the function.
1135   // If loop does not result in returning false, we return true.
1136   __ Bind(&return_true);
1137   __ movl(esi, Immediate(1));
1138   __ jmp(&end);
1139 
1140   // Return false and exit the function.
1141   __ Bind(&return_false);
1142   __ xorl(esi, esi);
1143   __ Bind(&end);
1144 }
1145 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1146 static void CreateStringIndexOfLocations(HInvoke* invoke,
1147                                          ArenaAllocator* allocator,
1148                                          bool start_at_zero) {
1149   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1150                                                                LocationSummary::kCallOnSlowPath,
1151                                                                kIntrinsified);
1152   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1153   locations->SetInAt(0, Location::RegisterLocation(EDI));
1154   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1155   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1156   // of the instruction explicitly.
1157   // Note: This works as we don't clobber EAX anywhere.
1158   locations->SetInAt(1, Location::RegisterLocation(EAX));
1159   if (!start_at_zero) {
1160     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1161   }
1162   // As we clobber EDI during execution anyways, also use it as the output.
1163   locations->SetOut(Location::SameAsFirstInput());
1164 
1165   // repne scasw uses ECX as the counter.
1166   locations->AddTemp(Location::RegisterLocation(ECX));
1167   // Need another temporary to be able to compute the result.
1168   locations->AddTemp(Location::RequiresRegister());
1169   if (mirror::kUseStringCompression) {
1170     // Need another temporary to be able to save unflagged string length.
1171     locations->AddTemp(Location::RequiresRegister());
1172   }
1173 }
1174 
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1175 static void GenerateStringIndexOf(HInvoke* invoke,
1176                                   X86Assembler* assembler,
1177                                   CodeGeneratorX86* codegen,
1178                                   bool start_at_zero) {
1179   LocationSummary* locations = invoke->GetLocations();
1180 
1181   // Note that the null check must have been done earlier.
1182   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1183 
1184   Register string_obj = locations->InAt(0).AsRegister<Register>();
1185   Register search_value = locations->InAt(1).AsRegister<Register>();
1186   Register counter = locations->GetTemp(0).AsRegister<Register>();
1187   Register string_length = locations->GetTemp(1).AsRegister<Register>();
1188   Register out = locations->Out().AsRegister<Register>();
1189   // Only used when string compression feature is on.
1190   Register string_length_flagged;
1191 
1192   // Check our assumptions for registers.
1193   DCHECK_EQ(string_obj, EDI);
1194   DCHECK_EQ(search_value, EAX);
1195   DCHECK_EQ(counter, ECX);
1196   DCHECK_EQ(out, EDI);
1197 
1198   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1199   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1200   SlowPathCode* slow_path = nullptr;
1201   HInstruction* code_point = invoke->InputAt(1);
1202   if (code_point->IsIntConstant()) {
1203     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1204     std::numeric_limits<uint16_t>::max()) {
1205       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1206       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1207       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1208       codegen->AddSlowPath(slow_path);
1209       __ jmp(slow_path->GetEntryLabel());
1210       __ Bind(slow_path->GetExitLabel());
1211       return;
1212     }
1213   } else if (code_point->GetType() != DataType::Type::kUint16) {
1214     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1215     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1216     codegen->AddSlowPath(slow_path);
1217     __ j(kAbove, slow_path->GetEntryLabel());
1218   }
1219 
1220   // From here down, we know that we are looking for a char that fits in 16 bits.
1221   // Location of reference to data array within the String object.
1222   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1223   // Location of count within the String object.
1224   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1225 
1226   // Load the count field of the string containing the length and compression flag.
1227   __ movl(string_length, Address(string_obj, count_offset));
1228 
1229   // Do a zero-length check. Even with string compression `count == 0` means empty.
1230   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1231                 "Expecting 0=compressed, 1=uncompressed");
1232   // TODO: Support jecxz.
1233   NearLabel not_found_label;
1234   __ testl(string_length, string_length);
1235   __ j(kEqual, &not_found_label);
1236 
1237   if (mirror::kUseStringCompression) {
1238     string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1239     __ movl(string_length_flagged, string_length);
1240     // Extract the length and shift out the least significant bit used as compression flag.
1241     __ shrl(string_length, Immediate(1));
1242   }
1243 
1244   if (start_at_zero) {
1245     // Number of chars to scan is the same as the string length.
1246     __ movl(counter, string_length);
1247 
1248     // Move to the start of the string.
1249     __ addl(string_obj, Immediate(value_offset));
1250   } else {
1251     Register start_index = locations->InAt(2).AsRegister<Register>();
1252 
1253     // Do a start_index check.
1254     __ cmpl(start_index, string_length);
1255     __ j(kGreaterEqual, &not_found_label);
1256 
1257     // Ensure we have a start index >= 0;
1258     __ xorl(counter, counter);
1259     __ cmpl(start_index, Immediate(0));
1260     __ cmovl(kGreater, counter, start_index);
1261 
1262     if (mirror::kUseStringCompression) {
1263       NearLabel modify_counter, offset_uncompressed_label;
1264       __ testl(string_length_flagged, Immediate(1));
1265       __ j(kNotZero, &offset_uncompressed_label);
1266       // Move to the start of the string: string_obj + value_offset + start_index.
1267       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1268       __ jmp(&modify_counter);
1269 
1270       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1271       __ Bind(&offset_uncompressed_label);
1272       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1273 
1274       // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1275       // compare.
1276       __ Bind(&modify_counter);
1277     } else {
1278       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1279     }
1280     __ negl(counter);
1281     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1282   }
1283 
1284   if (mirror::kUseStringCompression) {
1285     NearLabel uncompressed_string_comparison;
1286     NearLabel comparison_done;
1287     __ testl(string_length_flagged, Immediate(1));
1288     __ j(kNotZero, &uncompressed_string_comparison);
1289 
1290     // Check if EAX (search_value) is ASCII.
1291     __ cmpl(search_value, Immediate(127));
1292     __ j(kGreater, &not_found_label);
1293     // Comparing byte-per-byte.
1294     __ repne_scasb();
1295     __ jmp(&comparison_done);
1296 
1297     // Everything is set up for repne scasw:
1298     //   * Comparison address in EDI.
1299     //   * Counter in ECX.
1300     __ Bind(&uncompressed_string_comparison);
1301     __ repne_scasw();
1302     __ Bind(&comparison_done);
1303   } else {
1304     __ repne_scasw();
1305   }
1306   // Did we find a match?
1307   __ j(kNotEqual, &not_found_label);
1308 
1309   // Yes, we matched.  Compute the index of the result.
1310   __ subl(string_length, counter);
1311   __ leal(out, Address(string_length, -1));
1312 
1313   NearLabel done;
1314   __ jmp(&done);
1315 
1316   // Failed to match; return -1.
1317   __ Bind(&not_found_label);
1318   __ movl(out, Immediate(-1));
1319 
1320   // And join up at the end.
1321   __ Bind(&done);
1322   if (slow_path != nullptr) {
1323     __ Bind(slow_path->GetExitLabel());
1324   }
1325 }
1326 
VisitStringIndexOf(HInvoke * invoke)1327 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1328   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1329 }
1330 
VisitStringIndexOf(HInvoke * invoke)1331 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1332   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1333 }
1334 
VisitStringIndexOfAfter(HInvoke * invoke)1335 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1336   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1337 }
1338 
VisitStringIndexOfAfter(HInvoke * invoke)1339 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1340   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1341 }
1342 
VisitStringNewStringFromBytes(HInvoke * invoke)1343 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1344   LocationSummary* locations = new (allocator_) LocationSummary(
1345       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1346   InvokeRuntimeCallingConvention calling_convention;
1347   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1348   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1349   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1350   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1351   locations->SetOut(Location::RegisterLocation(EAX));
1352 }
1353 
VisitStringNewStringFromBytes(HInvoke * invoke)1354 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1355   X86Assembler* assembler = GetAssembler();
1356   LocationSummary* locations = invoke->GetLocations();
1357 
1358   Register byte_array = locations->InAt(0).AsRegister<Register>();
1359   __ testl(byte_array, byte_array);
1360   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1361   codegen_->AddSlowPath(slow_path);
1362   __ j(kEqual, slow_path->GetEntryLabel());
1363 
1364   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1365   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1366   __ Bind(slow_path->GetExitLabel());
1367 }
1368 
VisitStringNewStringFromChars(HInvoke * invoke)1369 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1370   LocationSummary* locations =
1371       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1372   InvokeRuntimeCallingConvention calling_convention;
1373   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1374   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1375   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1376   locations->SetOut(Location::RegisterLocation(EAX));
1377 }
1378 
VisitStringNewStringFromChars(HInvoke * invoke)1379 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1380   // No need to emit code checking whether `locations->InAt(2)` is a null
1381   // pointer, as callers of the native method
1382   //
1383   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1384   //
1385   // all include a null check on `data` before calling that method.
1386   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1387   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1388 }
1389 
VisitStringNewStringFromString(HInvoke * invoke)1390 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1391   LocationSummary* locations = new (allocator_) LocationSummary(
1392       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1393   InvokeRuntimeCallingConvention calling_convention;
1394   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1395   locations->SetOut(Location::RegisterLocation(EAX));
1396 }
1397 
VisitStringNewStringFromString(HInvoke * invoke)1398 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1399   X86Assembler* assembler = GetAssembler();
1400   LocationSummary* locations = invoke->GetLocations();
1401 
1402   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1403   __ testl(string_to_copy, string_to_copy);
1404   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1405   codegen_->AddSlowPath(slow_path);
1406   __ j(kEqual, slow_path->GetEntryLabel());
1407 
1408   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1409   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1410   __ Bind(slow_path->GetExitLabel());
1411 }
1412 
VisitStringGetCharsNoCheck(HInvoke * invoke)1413 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1414   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1415   LocationSummary* locations =
1416       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1417   locations->SetInAt(0, Location::RequiresRegister());
1418   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1419   // Place srcEnd in ECX to save a move below.
1420   locations->SetInAt(2, Location::RegisterLocation(ECX));
1421   locations->SetInAt(3, Location::RequiresRegister());
1422   locations->SetInAt(4, Location::RequiresRegister());
1423 
1424   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1425   // We don't have enough registers to also grab ECX, so handle below.
1426   locations->AddTemp(Location::RegisterLocation(ESI));
1427   locations->AddTemp(Location::RegisterLocation(EDI));
1428 }
1429 
VisitStringGetCharsNoCheck(HInvoke * invoke)1430 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1431   X86Assembler* assembler = GetAssembler();
1432   LocationSummary* locations = invoke->GetLocations();
1433 
1434   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1435   // Location of data in char array buffer.
1436   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1437   // Location of char array data in string.
1438   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1439 
1440   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1441   Register obj = locations->InAt(0).AsRegister<Register>();
1442   Location srcBegin = locations->InAt(1);
1443   int srcBegin_value =
1444     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1445   Register srcEnd = locations->InAt(2).AsRegister<Register>();
1446   Register dst = locations->InAt(3).AsRegister<Register>();
1447   Register dstBegin = locations->InAt(4).AsRegister<Register>();
1448 
1449   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1450   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1451   DCHECK_EQ(char_size, 2u);
1452 
1453   // Compute the number of chars (words) to move.
1454   // Save ECX, since we don't know if it will be used later.
1455   __ pushl(ECX);
1456   int stack_adjust = kX86WordSize;
1457   __ cfi().AdjustCFAOffset(stack_adjust);
1458   DCHECK_EQ(srcEnd, ECX);
1459   if (srcBegin.IsConstant()) {
1460     __ subl(ECX, Immediate(srcBegin_value));
1461   } else {
1462     DCHECK(srcBegin.IsRegister());
1463     __ subl(ECX, srcBegin.AsRegister<Register>());
1464   }
1465 
1466   NearLabel done;
1467   if (mirror::kUseStringCompression) {
1468     // Location of count in string
1469     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1470     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1471     DCHECK_EQ(c_char_size, 1u);
1472     __ pushl(EAX);
1473     __ cfi().AdjustCFAOffset(stack_adjust);
1474 
1475     NearLabel copy_loop, copy_uncompressed;
1476     __ testl(Address(obj, count_offset), Immediate(1));
1477     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1478                   "Expecting 0=compressed, 1=uncompressed");
1479     __ j(kNotZero, &copy_uncompressed);
1480     // Compute the address of the source string by adding the number of chars from
1481     // the source beginning to the value offset of a string.
1482     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1483 
1484     // Start the loop to copy String's value to Array of Char.
1485     __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1486     __ Bind(&copy_loop);
1487     __ jecxz(&done);
1488     // Use EAX temporary (convert byte from ESI to word).
1489     // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1490     __ movzxb(EAX, Address(ESI, 0));
1491     __ movw(Address(EDI, 0), EAX);
1492     __ leal(EDI, Address(EDI, char_size));
1493     __ leal(ESI, Address(ESI, c_char_size));
1494     // TODO: Add support for LOOP to X86Assembler.
1495     __ subl(ECX, Immediate(1));
1496     __ jmp(&copy_loop);
1497     __ Bind(&copy_uncompressed);
1498   }
1499 
1500   // Do the copy for uncompressed string.
1501   // Compute the address of the destination buffer.
1502   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1503   __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1504   __ rep_movsw();
1505 
1506   __ Bind(&done);
1507   if (mirror::kUseStringCompression) {
1508     // Restore EAX.
1509     __ popl(EAX);
1510     __ cfi().AdjustCFAOffset(-stack_adjust);
1511   }
1512   // Restore ECX.
1513   __ popl(ECX);
1514   __ cfi().AdjustCFAOffset(-stack_adjust);
1515 }
1516 
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1517 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1518   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1519   Location out_loc = locations->Out();
1520   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1521   // to avoid a SIGBUS.
1522   switch (size) {
1523     case DataType::Type::kInt8:
1524       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1525       break;
1526     case DataType::Type::kInt16:
1527       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1528       break;
1529     case DataType::Type::kInt32:
1530       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1531       break;
1532     case DataType::Type::kInt64:
1533       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1534       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1535       break;
1536     default:
1537       LOG(FATAL) << "Type not recognized for peek: " << size;
1538       UNREACHABLE();
1539   }
1540 }
1541 
VisitMemoryPeekByte(HInvoke * invoke)1542 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1543   CreateLongToIntLocations(allocator_, invoke);
1544 }
1545 
VisitMemoryPeekByte(HInvoke * invoke)1546 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1547   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1548 }
1549 
VisitMemoryPeekIntNative(HInvoke * invoke)1550 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1551   CreateLongToIntLocations(allocator_, invoke);
1552 }
1553 
VisitMemoryPeekIntNative(HInvoke * invoke)1554 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1555   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1556 }
1557 
VisitMemoryPeekLongNative(HInvoke * invoke)1558 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1559   CreateLongToLongLocations(allocator_, invoke);
1560 }
1561 
VisitMemoryPeekLongNative(HInvoke * invoke)1562 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1563   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1564 }
1565 
VisitMemoryPeekShortNative(HInvoke * invoke)1566 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1567   CreateLongToIntLocations(allocator_, invoke);
1568 }
1569 
VisitMemoryPeekShortNative(HInvoke * invoke)1570 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1571   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1572 }
1573 
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1574 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1575                                          DataType::Type size,
1576                                          HInvoke* invoke) {
1577   LocationSummary* locations =
1578       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1579   locations->SetInAt(0, Location::RequiresRegister());
1580   HInstruction* value = invoke->InputAt(1);
1581   if (size == DataType::Type::kInt8) {
1582     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1583   } else {
1584     locations->SetInAt(1, Location::RegisterOrConstant(value));
1585   }
1586 }
1587 
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1588 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1589   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1590   Location value_loc = locations->InAt(1);
1591   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1592   // to avoid a SIGBUS.
1593   switch (size) {
1594     case DataType::Type::kInt8:
1595       if (value_loc.IsConstant()) {
1596         __ movb(Address(address, 0),
1597                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1598       } else {
1599         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1600       }
1601       break;
1602     case DataType::Type::kInt16:
1603       if (value_loc.IsConstant()) {
1604         __ movw(Address(address, 0),
1605                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1606       } else {
1607         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1608       }
1609       break;
1610     case DataType::Type::kInt32:
1611       if (value_loc.IsConstant()) {
1612         __ movl(Address(address, 0),
1613                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1614       } else {
1615         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1616       }
1617       break;
1618     case DataType::Type::kInt64:
1619       if (value_loc.IsConstant()) {
1620         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1621         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1622         __ movl(Address(address, 4), Immediate(High32Bits(value)));
1623       } else {
1624         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1625         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1626       }
1627       break;
1628     default:
1629       LOG(FATAL) << "Type not recognized for poke: " << size;
1630       UNREACHABLE();
1631   }
1632 }
1633 
VisitMemoryPokeByte(HInvoke * invoke)1634 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1635   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1636 }
1637 
VisitMemoryPokeByte(HInvoke * invoke)1638 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1639   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1640 }
1641 
VisitMemoryPokeIntNative(HInvoke * invoke)1642 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1643   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1644 }
1645 
VisitMemoryPokeIntNative(HInvoke * invoke)1646 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1647   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1648 }
1649 
VisitMemoryPokeLongNative(HInvoke * invoke)1650 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1651   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1652 }
1653 
VisitMemoryPokeLongNative(HInvoke * invoke)1654 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1655   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1656 }
1657 
VisitMemoryPokeShortNative(HInvoke * invoke)1658 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1659   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1660 }
1661 
VisitMemoryPokeShortNative(HInvoke * invoke)1662 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1663   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1664 }
1665 
VisitThreadCurrentThread(HInvoke * invoke)1666 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1667   LocationSummary* locations =
1668       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1669   locations->SetOut(Location::RequiresRegister());
1670 }
1671 
VisitThreadCurrentThread(HInvoke * invoke)1672 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1673   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1674   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1675 }
1676 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1677 static void GenUnsafeGet(HInvoke* invoke,
1678                          DataType::Type type,
1679                          bool is_volatile,
1680                          CodeGeneratorX86* codegen) {
1681   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1682   LocationSummary* locations = invoke->GetLocations();
1683   Location base_loc = locations->InAt(1);
1684   Register base = base_loc.AsRegister<Register>();
1685   Location offset_loc = locations->InAt(2);
1686   Register offset = offset_loc.AsRegisterPairLow<Register>();
1687   Location output_loc = locations->Out();
1688 
1689   switch (type) {
1690     case DataType::Type::kInt32: {
1691       Register output = output_loc.AsRegister<Register>();
1692       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1693       break;
1694     }
1695 
1696     case DataType::Type::kReference: {
1697       Register output = output_loc.AsRegister<Register>();
1698       if (kEmitCompilerReadBarrier) {
1699         if (kUseBakerReadBarrier) {
1700           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1701           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1702               invoke, output_loc, base, src, /* needs_null_check= */ false);
1703         } else {
1704           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1705           codegen->GenerateReadBarrierSlow(
1706               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1707         }
1708       } else {
1709         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1710         __ MaybeUnpoisonHeapReference(output);
1711       }
1712       break;
1713     }
1714 
1715     case DataType::Type::kInt64: {
1716         Register output_lo = output_loc.AsRegisterPairLow<Register>();
1717         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1718         if (is_volatile) {
1719           // Need to use a XMM to read atomically.
1720           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1721           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1722           __ movd(output_lo, temp);
1723           __ psrlq(temp, Immediate(32));
1724           __ movd(output_hi, temp);
1725         } else {
1726           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1727           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1728         }
1729       }
1730       break;
1731 
1732     default:
1733       LOG(FATAL) << "Unsupported op size " << type;
1734       UNREACHABLE();
1735   }
1736 }
1737 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1738 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1739                                           HInvoke* invoke,
1740                                           DataType::Type type,
1741                                           bool is_volatile) {
1742   bool can_call = kEmitCompilerReadBarrier &&
1743       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1744        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1745   LocationSummary* locations =
1746       new (allocator) LocationSummary(invoke,
1747                                       can_call
1748                                           ? LocationSummary::kCallOnSlowPath
1749                                           : LocationSummary::kNoCall,
1750                                       kIntrinsified);
1751   if (can_call && kUseBakerReadBarrier) {
1752     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1753   }
1754   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1755   locations->SetInAt(1, Location::RequiresRegister());
1756   locations->SetInAt(2, Location::RequiresRegister());
1757   if (type == DataType::Type::kInt64) {
1758     if (is_volatile) {
1759       // Need to use XMM to read volatile.
1760       locations->AddTemp(Location::RequiresFpuRegister());
1761       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1762     } else {
1763       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1764     }
1765   } else {
1766     locations->SetOut(Location::RequiresRegister(),
1767                       (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1768   }
1769 }
1770 
VisitUnsafeGet(HInvoke * invoke)1771 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1772   CreateIntIntIntToIntLocations(
1773       allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false);
1774 }
VisitUnsafeGetVolatile(HInvoke * invoke)1775 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1776   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true);
1777 }
VisitUnsafeGetLong(HInvoke * invoke)1778 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1779   CreateIntIntIntToIntLocations(
1780       allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false);
1781 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1782 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1783   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true);
1784 }
VisitUnsafeGetObject(HInvoke * invoke)1785 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1786   CreateIntIntIntToIntLocations(
1787       allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false);
1788 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1789 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1790   CreateIntIntIntToIntLocations(
1791       allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true);
1792 }
1793 
1794 
VisitUnsafeGet(HInvoke * invoke)1795 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1796   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1797 }
VisitUnsafeGetVolatile(HInvoke * invoke)1798 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1799   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1800 }
VisitUnsafeGetLong(HInvoke * invoke)1801 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1802   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1803 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1804 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1805   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1806 }
VisitUnsafeGetObject(HInvoke * invoke)1807 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1808   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1809 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1810 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1811   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1812 }
1813 
1814 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1815 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1816                                                        DataType::Type type,
1817                                                        HInvoke* invoke,
1818                                                        bool is_volatile) {
1819   LocationSummary* locations =
1820       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1821   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1822   locations->SetInAt(1, Location::RequiresRegister());
1823   locations->SetInAt(2, Location::RequiresRegister());
1824   locations->SetInAt(3, Location::RequiresRegister());
1825   if (type == DataType::Type::kReference) {
1826     // Need temp registers for card-marking.
1827     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1828     // Ensure the value is in a byte register.
1829     locations->AddTemp(Location::RegisterLocation(ECX));
1830   } else if (type == DataType::Type::kInt64 && is_volatile) {
1831     locations->AddTemp(Location::RequiresFpuRegister());
1832     locations->AddTemp(Location::RequiresFpuRegister());
1833   }
1834 }
1835 
VisitUnsafePut(HInvoke * invoke)1836 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1837   CreateIntIntIntIntToVoidPlusTempsLocations(
1838       allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1839 }
VisitUnsafePutOrdered(HInvoke * invoke)1840 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1841   CreateIntIntIntIntToVoidPlusTempsLocations(
1842       allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1843 }
VisitUnsafePutVolatile(HInvoke * invoke)1844 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1845   CreateIntIntIntIntToVoidPlusTempsLocations(
1846       allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true);
1847 }
VisitUnsafePutObject(HInvoke * invoke)1848 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1849   CreateIntIntIntIntToVoidPlusTempsLocations(
1850       allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1851 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1852 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1853   CreateIntIntIntIntToVoidPlusTempsLocations(
1854       allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1855 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1856 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1857   CreateIntIntIntIntToVoidPlusTempsLocations(
1858       allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true);
1859 }
VisitUnsafePutLong(HInvoke * invoke)1860 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1861   CreateIntIntIntIntToVoidPlusTempsLocations(
1862       allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1863 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1864 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1865   CreateIntIntIntIntToVoidPlusTempsLocations(
1866       allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1867 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1868 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1869   CreateIntIntIntIntToVoidPlusTempsLocations(
1870       allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true);
1871 }
1872 
1873 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1874 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1875 static void GenUnsafePut(LocationSummary* locations,
1876                          DataType::Type type,
1877                          bool is_volatile,
1878                          CodeGeneratorX86* codegen) {
1879   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1880   Register base = locations->InAt(1).AsRegister<Register>();
1881   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1882   Location value_loc = locations->InAt(3);
1883 
1884   if (type == DataType::Type::kInt64) {
1885     Register value_lo = value_loc.AsRegisterPairLow<Register>();
1886     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1887     if (is_volatile) {
1888       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1889       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1890       __ movd(temp1, value_lo);
1891       __ movd(temp2, value_hi);
1892       __ punpckldq(temp1, temp2);
1893       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1894     } else {
1895       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1896       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1897     }
1898   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1899     Register temp = locations->GetTemp(0).AsRegister<Register>();
1900     __ movl(temp, value_loc.AsRegister<Register>());
1901     __ PoisonHeapReference(temp);
1902     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
1903   } else {
1904     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1905   }
1906 
1907   if (is_volatile) {
1908     codegen->MemoryFence();
1909   }
1910 
1911   if (type == DataType::Type::kReference) {
1912     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1913     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1914                         locations->GetTemp(1).AsRegister<Register>(),
1915                         base,
1916                         value_loc.AsRegister<Register>(),
1917                         value_can_be_null);
1918   }
1919 }
1920 
VisitUnsafePut(HInvoke * invoke)1921 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1922   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1923 }
VisitUnsafePutOrdered(HInvoke * invoke)1924 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1925   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1926 }
VisitUnsafePutVolatile(HInvoke * invoke)1927 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1928   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1929 }
VisitUnsafePutObject(HInvoke * invoke)1930 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1931   GenUnsafePut(
1932       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1933 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1934 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1935   GenUnsafePut(
1936       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1937 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1938 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1939   GenUnsafePut(
1940       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1941 }
VisitUnsafePutLong(HInvoke * invoke)1942 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1943   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1944 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1945 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1946   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1947 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1948 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1949   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1950 }
1951 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1952 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
1953                                        DataType::Type type,
1954                                        HInvoke* invoke) {
1955   bool can_call = kEmitCompilerReadBarrier &&
1956       kUseBakerReadBarrier &&
1957       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1958   LocationSummary* locations =
1959       new (allocator) LocationSummary(invoke,
1960                                       can_call
1961                                           ? LocationSummary::kCallOnSlowPath
1962                                           : LocationSummary::kNoCall,
1963                                       kIntrinsified);
1964   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1965   locations->SetInAt(1, Location::RequiresRegister());
1966   // Offset is a long, but in 32 bit mode, we only need the low word.
1967   // Can we update the invoke here to remove a TypeConvert to Long?
1968   locations->SetInAt(2, Location::RequiresRegister());
1969   // Expected value must be in EAX or EDX:EAX.
1970   // For long, new value must be in ECX:EBX.
1971   if (type == DataType::Type::kInt64) {
1972     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
1973     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
1974   } else {
1975     locations->SetInAt(3, Location::RegisterLocation(EAX));
1976     locations->SetInAt(4, Location::RequiresRegister());
1977   }
1978 
1979   // Force a byte register for the output.
1980   locations->SetOut(Location::RegisterLocation(EAX));
1981   if (type == DataType::Type::kReference) {
1982     // Need temporary registers for card-marking, and possibly for
1983     // (Baker) read barrier.
1984     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1985     // Need a byte register for marking.
1986     locations->AddTemp(Location::RegisterLocation(ECX));
1987   }
1988 }
1989 
VisitUnsafeCASInt(HInvoke * invoke)1990 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
1991   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
1992 }
1993 
VisitUnsafeCASLong(HInvoke * invoke)1994 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
1995   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
1996 }
1997 
VisitUnsafeCASObject(HInvoke * invoke)1998 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
1999   // The only read barrier implementation supporting the
2000   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2001   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2002     return;
2003   }
2004 
2005   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
2006 }
2007 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2008 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2009   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2010   LocationSummary* locations = invoke->GetLocations();
2011 
2012   Register base = locations->InAt(1).AsRegister<Register>();
2013   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2014   Location out = locations->Out();
2015   DCHECK_EQ(out.AsRegister<Register>(), EAX);
2016 
2017   // The address of the field within the holding object.
2018   Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2019 
2020   if (type == DataType::Type::kReference) {
2021     // The only read barrier implementation supporting the
2022     // UnsafeCASObject intrinsic is the Baker-style read barriers.
2023     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2024 
2025     Location temp1_loc = locations->GetTemp(0);
2026     Register temp1 = temp1_loc.AsRegister<Register>();
2027     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2028 
2029     Register expected = locations->InAt(3).AsRegister<Register>();
2030     // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
2031     DCHECK_EQ(expected, EAX);
2032     Register value = locations->InAt(4).AsRegister<Register>();
2033 
2034     // Mark card for object assuming new value is stored.
2035     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2036     codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2037 
2038     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2039       // Need to make sure the reference stored in the field is a to-space
2040       // one before attempting the CAS or the CAS could fail incorrectly.
2041       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2042           invoke,
2043           temp1_loc,  // Unused, used only as a "temporary" within the read barrier.
2044           base,
2045           field_addr,
2046           /* needs_null_check= */ false,
2047           /* always_update_field= */ true,
2048           &temp2);
2049     }
2050 
2051     bool base_equals_value = (base == value);
2052     if (kPoisonHeapReferences) {
2053       if (base_equals_value) {
2054         // If `base` and `value` are the same register location, move
2055         // `value` to a temporary register.  This way, poisoning
2056         // `value` won't invalidate `base`.
2057         value = temp1;
2058         __ movl(value, base);
2059       }
2060 
2061       // Check that the register allocator did not assign the location
2062       // of `expected` (EAX) to `value` nor to `base`, so that heap
2063       // poisoning (when enabled) works as intended below.
2064       // - If `value` were equal to `expected`, both references would
2065       //   be poisoned twice, meaning they would not be poisoned at
2066       //   all, as heap poisoning uses address negation.
2067       // - If `base` were equal to `expected`, poisoning `expected`
2068       //   would invalidate `base`.
2069       DCHECK_NE(value, expected);
2070       DCHECK_NE(base, expected);
2071 
2072       __ PoisonHeapReference(expected);
2073       __ PoisonHeapReference(value);
2074     }
2075 
2076     __ LockCmpxchgl(field_addr, value);
2077 
2078     // LOCK CMPXCHG has full barrier semantics, and we don't need
2079     // scheduling barriers at this time.
2080 
2081     // Convert ZF into the Boolean result.
2082     __ setb(kZero, out.AsRegister<Register>());
2083     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2084 
2085     // If heap poisoning is enabled, we need to unpoison the values
2086     // that were poisoned earlier.
2087     if (kPoisonHeapReferences) {
2088       if (base_equals_value) {
2089         // `value` has been moved to a temporary register, no need to
2090         // unpoison it.
2091       } else {
2092         // Ensure `value` is different from `out`, so that unpoisoning
2093         // the former does not invalidate the latter.
2094         DCHECK_NE(value, out.AsRegister<Register>());
2095         __ UnpoisonHeapReference(value);
2096       }
2097       // Do not unpoison the reference contained in register
2098       // `expected`, as it is the same as register `out` (EAX).
2099     }
2100   } else {
2101     if (type == DataType::Type::kInt32) {
2102       // Ensure the expected value is in EAX (required by the CMPXCHG
2103       // instruction).
2104       DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
2105       __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
2106     } else if (type == DataType::Type::kInt64) {
2107       // Ensure the expected value is in EAX:EDX and that the new
2108       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2109       DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2110       DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2111       DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2112       DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
2113       __ LockCmpxchg8b(field_addr);
2114     } else {
2115       LOG(FATAL) << "Unexpected CAS type " << type;
2116     }
2117 
2118     // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2119     // don't need scheduling barriers at this time.
2120 
2121     // Convert ZF into the Boolean result.
2122     __ setb(kZero, out.AsRegister<Register>());
2123     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2124   }
2125 }
2126 
VisitUnsafeCASInt(HInvoke * invoke)2127 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2128   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2129 }
2130 
VisitUnsafeCASLong(HInvoke * invoke)2131 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2132   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2133 }
2134 
VisitUnsafeCASObject(HInvoke * invoke)2135 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2136   // The only read barrier implementation supporting the
2137   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2138   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2139 
2140   GenCAS(DataType::Type::kReference, invoke, codegen_);
2141 }
2142 
VisitIntegerReverse(HInvoke * invoke)2143 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2144   LocationSummary* locations =
2145       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2146   locations->SetInAt(0, Location::RequiresRegister());
2147   locations->SetOut(Location::SameAsFirstInput());
2148   locations->AddTemp(Location::RequiresRegister());
2149 }
2150 
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2151 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2152                      X86Assembler* assembler) {
2153   Immediate imm_shift(shift);
2154   Immediate imm_mask(mask);
2155   __ movl(temp, reg);
2156   __ shrl(reg, imm_shift);
2157   __ andl(temp, imm_mask);
2158   __ andl(reg, imm_mask);
2159   __ shll(temp, imm_shift);
2160   __ orl(reg, temp);
2161 }
2162 
VisitIntegerReverse(HInvoke * invoke)2163 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2164   X86Assembler* assembler = GetAssembler();
2165   LocationSummary* locations = invoke->GetLocations();
2166 
2167   Register reg = locations->InAt(0).AsRegister<Register>();
2168   Register temp = locations->GetTemp(0).AsRegister<Register>();
2169 
2170   /*
2171    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2172    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2173    * compared to generic luni implementation which has 5 rounds of swapping bits.
2174    * x = bswap x
2175    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2176    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2177    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2178    */
2179   __ bswapl(reg);
2180   SwapBits(reg, temp, 1, 0x55555555, assembler);
2181   SwapBits(reg, temp, 2, 0x33333333, assembler);
2182   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2183 }
2184 
VisitLongReverse(HInvoke * invoke)2185 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2186   LocationSummary* locations =
2187       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2188   locations->SetInAt(0, Location::RequiresRegister());
2189   locations->SetOut(Location::SameAsFirstInput());
2190   locations->AddTemp(Location::RequiresRegister());
2191 }
2192 
VisitLongReverse(HInvoke * invoke)2193 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2194   X86Assembler* assembler = GetAssembler();
2195   LocationSummary* locations = invoke->GetLocations();
2196 
2197   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2198   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2199   Register temp = locations->GetTemp(0).AsRegister<Register>();
2200 
2201   // We want to swap high/low, then bswap each one, and then do the same
2202   // as a 32 bit reverse.
2203   // Exchange high and low.
2204   __ movl(temp, reg_low);
2205   __ movl(reg_low, reg_high);
2206   __ movl(reg_high, temp);
2207 
2208   // bit-reverse low
2209   __ bswapl(reg_low);
2210   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2211   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2212   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2213 
2214   // bit-reverse high
2215   __ bswapl(reg_high);
2216   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2217   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2218   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2219 }
2220 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2221 static void CreateBitCountLocations(
2222     ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2223   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2224     // Do nothing if there is no popcnt support. This results in generating
2225     // a call for the intrinsic rather than direct code.
2226     return;
2227   }
2228   LocationSummary* locations =
2229       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2230   if (is_long) {
2231     locations->AddTemp(Location::RequiresRegister());
2232   }
2233   locations->SetInAt(0, Location::Any());
2234   locations->SetOut(Location::RequiresRegister());
2235 }
2236 
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2237 static void GenBitCount(X86Assembler* assembler,
2238                         CodeGeneratorX86* codegen,
2239                         HInvoke* invoke, bool is_long) {
2240   LocationSummary* locations = invoke->GetLocations();
2241   Location src = locations->InAt(0);
2242   Register out = locations->Out().AsRegister<Register>();
2243 
2244   if (invoke->InputAt(0)->IsConstant()) {
2245     // Evaluate this at compile time.
2246     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2247     int32_t result = is_long
2248         ? POPCOUNT(static_cast<uint64_t>(value))
2249         : POPCOUNT(static_cast<uint32_t>(value));
2250     codegen->Load32BitValue(out, result);
2251     return;
2252   }
2253 
2254   // Handle the non-constant cases.
2255   if (!is_long) {
2256     if (src.IsRegister()) {
2257       __ popcntl(out, src.AsRegister<Register>());
2258     } else {
2259       DCHECK(src.IsStackSlot());
2260       __ popcntl(out, Address(ESP, src.GetStackIndex()));
2261     }
2262   } else {
2263     // The 64-bit case needs to worry about two parts.
2264     Register temp = locations->GetTemp(0).AsRegister<Register>();
2265     if (src.IsRegisterPair()) {
2266       __ popcntl(temp, src.AsRegisterPairLow<Register>());
2267       __ popcntl(out, src.AsRegisterPairHigh<Register>());
2268     } else {
2269       DCHECK(src.IsDoubleStackSlot());
2270       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2271       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2272     }
2273     __ addl(out, temp);
2274   }
2275 }
2276 
VisitIntegerBitCount(HInvoke * invoke)2277 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2278   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2279 }
2280 
VisitIntegerBitCount(HInvoke * invoke)2281 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2282   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2283 }
2284 
VisitLongBitCount(HInvoke * invoke)2285 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2286   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2287 }
2288 
VisitLongBitCount(HInvoke * invoke)2289 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2290   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2291 }
2292 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2293 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2294   LocationSummary* locations =
2295       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2296   if (is_long) {
2297     locations->SetInAt(0, Location::RequiresRegister());
2298   } else {
2299     locations->SetInAt(0, Location::Any());
2300   }
2301   locations->SetOut(Location::RequiresRegister());
2302 }
2303 
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2304 static void GenLeadingZeros(X86Assembler* assembler,
2305                             CodeGeneratorX86* codegen,
2306                             HInvoke* invoke, bool is_long) {
2307   LocationSummary* locations = invoke->GetLocations();
2308   Location src = locations->InAt(0);
2309   Register out = locations->Out().AsRegister<Register>();
2310 
2311   if (invoke->InputAt(0)->IsConstant()) {
2312     // Evaluate this at compile time.
2313     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2314     if (value == 0) {
2315       value = is_long ? 64 : 32;
2316     } else {
2317       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2318     }
2319     codegen->Load32BitValue(out, value);
2320     return;
2321   }
2322 
2323   // Handle the non-constant cases.
2324   if (!is_long) {
2325     if (src.IsRegister()) {
2326       __ bsrl(out, src.AsRegister<Register>());
2327     } else {
2328       DCHECK(src.IsStackSlot());
2329       __ bsrl(out, Address(ESP, src.GetStackIndex()));
2330     }
2331 
2332     // BSR sets ZF if the input was zero, and the output is undefined.
2333     NearLabel all_zeroes, done;
2334     __ j(kEqual, &all_zeroes);
2335 
2336     // Correct the result from BSR to get the final CLZ result.
2337     __ xorl(out, Immediate(31));
2338     __ jmp(&done);
2339 
2340     // Fix the zero case with the expected result.
2341     __ Bind(&all_zeroes);
2342     __ movl(out, Immediate(32));
2343 
2344     __ Bind(&done);
2345     return;
2346   }
2347 
2348   // 64 bit case needs to worry about both parts of the register.
2349   DCHECK(src.IsRegisterPair());
2350   Register src_lo = src.AsRegisterPairLow<Register>();
2351   Register src_hi = src.AsRegisterPairHigh<Register>();
2352   NearLabel handle_low, done, all_zeroes;
2353 
2354   // Is the high word zero?
2355   __ testl(src_hi, src_hi);
2356   __ j(kEqual, &handle_low);
2357 
2358   // High word is not zero. We know that the BSR result is defined in this case.
2359   __ bsrl(out, src_hi);
2360 
2361   // Correct the result from BSR to get the final CLZ result.
2362   __ xorl(out, Immediate(31));
2363   __ jmp(&done);
2364 
2365   // High word was zero.  We have to compute the low word count and add 32.
2366   __ Bind(&handle_low);
2367   __ bsrl(out, src_lo);
2368   __ j(kEqual, &all_zeroes);
2369 
2370   // We had a valid result.  Use an XOR to both correct the result and add 32.
2371   __ xorl(out, Immediate(63));
2372   __ jmp(&done);
2373 
2374   // All zero case.
2375   __ Bind(&all_zeroes);
2376   __ movl(out, Immediate(64));
2377 
2378   __ Bind(&done);
2379 }
2380 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2381 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2382   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2383 }
2384 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2385 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2386   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2387 }
2388 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2389 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2390   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2391 }
2392 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2393 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2394   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2395 }
2396 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2397 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2398   LocationSummary* locations =
2399       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2400   if (is_long) {
2401     locations->SetInAt(0, Location::RequiresRegister());
2402   } else {
2403     locations->SetInAt(0, Location::Any());
2404   }
2405   locations->SetOut(Location::RequiresRegister());
2406 }
2407 
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2408 static void GenTrailingZeros(X86Assembler* assembler,
2409                              CodeGeneratorX86* codegen,
2410                              HInvoke* invoke, bool is_long) {
2411   LocationSummary* locations = invoke->GetLocations();
2412   Location src = locations->InAt(0);
2413   Register out = locations->Out().AsRegister<Register>();
2414 
2415   if (invoke->InputAt(0)->IsConstant()) {
2416     // Evaluate this at compile time.
2417     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2418     if (value == 0) {
2419       value = is_long ? 64 : 32;
2420     } else {
2421       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2422     }
2423     codegen->Load32BitValue(out, value);
2424     return;
2425   }
2426 
2427   // Handle the non-constant cases.
2428   if (!is_long) {
2429     if (src.IsRegister()) {
2430       __ bsfl(out, src.AsRegister<Register>());
2431     } else {
2432       DCHECK(src.IsStackSlot());
2433       __ bsfl(out, Address(ESP, src.GetStackIndex()));
2434     }
2435 
2436     // BSF sets ZF if the input was zero, and the output is undefined.
2437     NearLabel done;
2438     __ j(kNotEqual, &done);
2439 
2440     // Fix the zero case with the expected result.
2441     __ movl(out, Immediate(32));
2442 
2443     __ Bind(&done);
2444     return;
2445   }
2446 
2447   // 64 bit case needs to worry about both parts of the register.
2448   DCHECK(src.IsRegisterPair());
2449   Register src_lo = src.AsRegisterPairLow<Register>();
2450   Register src_hi = src.AsRegisterPairHigh<Register>();
2451   NearLabel done, all_zeroes;
2452 
2453   // If the low word is zero, then ZF will be set.  If not, we have the answer.
2454   __ bsfl(out, src_lo);
2455   __ j(kNotEqual, &done);
2456 
2457   // Low word was zero.  We have to compute the high word count and add 32.
2458   __ bsfl(out, src_hi);
2459   __ j(kEqual, &all_zeroes);
2460 
2461   // We had a valid result.  Add 32 to account for the low word being zero.
2462   __ addl(out, Immediate(32));
2463   __ jmp(&done);
2464 
2465   // All zero case.
2466   __ Bind(&all_zeroes);
2467   __ movl(out, Immediate(64));
2468 
2469   __ Bind(&done);
2470 }
2471 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2472 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2473   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
2474 }
2475 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2476 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2477   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2478 }
2479 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2480 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2481   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
2482 }
2483 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2484 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2485   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2486 }
2487 
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2488 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2489   return instruction->InputAt(input0) == instruction->InputAt(input1);
2490 }
2491 
2492 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,DataType::Type type,const Register & array,const Location & pos,const Register & base)2493 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2494                                           DataType::Type type,
2495                                           const Register& array,
2496                                           const Location& pos,
2497                                           const Register& base) {
2498   // This routine is only used by the SystemArrayCopy intrinsic at the
2499   // moment. We can allow DataType::Type::kReference as `type` to implement
2500   // the SystemArrayCopyChar intrinsic.
2501   DCHECK_EQ(type, DataType::Type::kReference);
2502   const int32_t element_size = DataType::Size(type);
2503   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2504   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2505 
2506   if (pos.IsConstant()) {
2507     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2508     __ leal(base, Address(array, element_size * constant + data_offset));
2509   } else {
2510     __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2511   }
2512 }
2513 
2514 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,DataType::Type type,const Location & copy_length,const Register & base,const Register & end)2515 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2516                                          DataType::Type type,
2517                                          const Location& copy_length,
2518                                          const Register& base,
2519                                          const Register& end) {
2520   // This routine is only used by the SystemArrayCopy intrinsic at the
2521   // moment. We can allow DataType::Type::kReference as `type` to implement
2522   // the SystemArrayCopyChar intrinsic.
2523   DCHECK_EQ(type, DataType::Type::kReference);
2524   const int32_t element_size = DataType::Size(type);
2525   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2526 
2527   if (copy_length.IsConstant()) {
2528     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2529     __ leal(end, Address(base, element_size * constant));
2530   } else {
2531     __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2532   }
2533 }
2534 
VisitSystemArrayCopy(HInvoke * invoke)2535 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2536   // The only read barrier implementation supporting the
2537   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2538   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2539     return;
2540   }
2541 
2542   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2543   if (invoke->GetLocations() != nullptr) {
2544     // Need a byte register for marking.
2545     invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2546 
2547     static constexpr size_t kSrc = 0;
2548     static constexpr size_t kSrcPos = 1;
2549     static constexpr size_t kDest = 2;
2550     static constexpr size_t kDestPos = 3;
2551     static constexpr size_t kLength = 4;
2552 
2553     if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2554         !invoke->InputAt(kDestPos)->IsIntConstant() &&
2555         !invoke->InputAt(kLength)->IsIntConstant()) {
2556       if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2557           !IsSameInput(invoke, kSrcPos, kLength) &&
2558           !IsSameInput(invoke, kDestPos, kLength) &&
2559           !IsSameInput(invoke, kSrc, kDest)) {
2560         // Not enough registers, make the length also take a stack slot.
2561         invoke->GetLocations()->SetInAt(kLength, Location::Any());
2562       }
2563     }
2564   }
2565 }
2566 
VisitSystemArrayCopy(HInvoke * invoke)2567 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2568   // The only read barrier implementation supporting the
2569   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2570   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2571 
2572   X86Assembler* assembler = GetAssembler();
2573   LocationSummary* locations = invoke->GetLocations();
2574 
2575   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2576   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2577   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2578   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2579   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2580 
2581   Register src = locations->InAt(0).AsRegister<Register>();
2582   Location src_pos = locations->InAt(1);
2583   Register dest = locations->InAt(2).AsRegister<Register>();
2584   Location dest_pos = locations->InAt(3);
2585   Location length_arg = locations->InAt(4);
2586   Location length = length_arg;
2587   Location temp1_loc = locations->GetTemp(0);
2588   Register temp1 = temp1_loc.AsRegister<Register>();
2589   Location temp2_loc = locations->GetTemp(1);
2590   Register temp2 = temp2_loc.AsRegister<Register>();
2591 
2592   SlowPathCode* intrinsic_slow_path =
2593       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
2594   codegen_->AddSlowPath(intrinsic_slow_path);
2595 
2596   NearLabel conditions_on_positions_validated;
2597   SystemArrayCopyOptimizations optimizations(invoke);
2598 
2599   // If source and destination are the same, we go to slow path if we need to do
2600   // forward copying.
2601   if (src_pos.IsConstant()) {
2602     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2603     if (dest_pos.IsConstant()) {
2604       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2605       if (optimizations.GetDestinationIsSource()) {
2606         // Checked when building locations.
2607         DCHECK_GE(src_pos_constant, dest_pos_constant);
2608       } else if (src_pos_constant < dest_pos_constant) {
2609         __ cmpl(src, dest);
2610         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2611       }
2612     } else {
2613       if (!optimizations.GetDestinationIsSource()) {
2614         __ cmpl(src, dest);
2615         __ j(kNotEqual, &conditions_on_positions_validated);
2616       }
2617       __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
2618       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
2619     }
2620   } else {
2621     if (!optimizations.GetDestinationIsSource()) {
2622       __ cmpl(src, dest);
2623       __ j(kNotEqual, &conditions_on_positions_validated);
2624     }
2625     if (dest_pos.IsConstant()) {
2626       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2627       __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
2628       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2629     } else {
2630       __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
2631       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2632     }
2633   }
2634 
2635   __ Bind(&conditions_on_positions_validated);
2636 
2637   if (!optimizations.GetSourceIsNotNull()) {
2638     // Bail out if the source is null.
2639     __ testl(src, src);
2640     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2641   }
2642 
2643   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2644     // Bail out if the destination is null.
2645     __ testl(dest, dest);
2646     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2647   }
2648 
2649   Location temp3_loc = locations->GetTemp(2);
2650   Register temp3 = temp3_loc.AsRegister<Register>();
2651   if (length.IsStackSlot()) {
2652     __ movl(temp3, Address(ESP, length.GetStackIndex()));
2653     length = Location::RegisterLocation(temp3);
2654   }
2655 
2656   // If the length is negative, bail out.
2657   // We have already checked in the LocationsBuilder for the constant case.
2658   if (!length.IsConstant() &&
2659       !optimizations.GetCountIsSourceLength() &&
2660       !optimizations.GetCountIsDestinationLength()) {
2661     __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
2662     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2663   }
2664 
2665   // Validity checks: source.
2666   CheckPosition(assembler,
2667                 src_pos,
2668                 src,
2669                 length,
2670                 intrinsic_slow_path,
2671                 temp1,
2672                 optimizations.GetCountIsSourceLength());
2673 
2674   // Validity checks: dest.
2675   CheckPosition(assembler,
2676                 dest_pos,
2677                 dest,
2678                 length,
2679                 intrinsic_slow_path,
2680                 temp1,
2681                 optimizations.GetCountIsDestinationLength());
2682 
2683   if (!optimizations.GetDoesNotNeedTypeCheck()) {
2684     // Check whether all elements of the source array are assignable to the component
2685     // type of the destination array. We do two checks: the classes are the same,
2686     // or the destination is Object[]. If none of these checks succeed, we go to the
2687     // slow path.
2688 
2689     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2690       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2691         // /* HeapReference<Class> */ temp1 = src->klass_
2692         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2693             invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2694         // Bail out if the source is not a non primitive array.
2695         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2696         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2697             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2698         __ testl(temp1, temp1);
2699         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2700         // If heap poisoning is enabled, `temp1` has been unpoisoned
2701         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2702       } else {
2703         // /* HeapReference<Class> */ temp1 = src->klass_
2704         __ movl(temp1, Address(src, class_offset));
2705         __ MaybeUnpoisonHeapReference(temp1);
2706         // Bail out if the source is not a non primitive array.
2707         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2708         __ movl(temp1, Address(temp1, component_offset));
2709         __ testl(temp1, temp1);
2710         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2711         __ MaybeUnpoisonHeapReference(temp1);
2712       }
2713       __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2714       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2715     }
2716 
2717     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2718       if (length.Equals(Location::RegisterLocation(temp3))) {
2719         // When Baker read barriers are enabled, register `temp3`,
2720         // which in the present case contains the `length` parameter,
2721         // will be overwritten below.  Make the `length` location
2722         // reference the original stack location; it will be moved
2723         // back to `temp3` later if necessary.
2724         DCHECK(length_arg.IsStackSlot());
2725         length = length_arg;
2726       }
2727 
2728       // /* HeapReference<Class> */ temp1 = dest->klass_
2729       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2730           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
2731 
2732       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2733         // Bail out if the destination is not a non primitive array.
2734         //
2735         // Register `temp1` is not trashed by the read barrier emitted
2736         // by GenerateFieldLoadWithBakerReadBarrier below, as that
2737         // method produces a call to a ReadBarrierMarkRegX entry point,
2738         // which saves all potentially live registers, including
2739         // temporaries such a `temp1`.
2740         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2741         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2742             invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false);
2743         __ testl(temp2, temp2);
2744         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2745         // If heap poisoning is enabled, `temp2` has been unpoisoned
2746         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2747         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2748         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2749       }
2750 
2751       // For the same reason given earlier, `temp1` is not trashed by the
2752       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2753       // /* HeapReference<Class> */ temp2 = src->klass_
2754       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2755           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
2756       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2757       __ cmpl(temp1, temp2);
2758 
2759       if (optimizations.GetDestinationIsTypedObjectArray()) {
2760         NearLabel do_copy;
2761         __ j(kEqual, &do_copy);
2762         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2763         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2764             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2765         // We do not need to emit a read barrier for the following
2766         // heap reference load, as `temp1` is only used in a
2767         // comparison with null below, and this reference is not
2768         // kept afterwards.
2769         __ cmpl(Address(temp1, super_offset), Immediate(0));
2770         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2771         __ Bind(&do_copy);
2772       } else {
2773         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2774       }
2775     } else {
2776       // Non read barrier code.
2777 
2778       // /* HeapReference<Class> */ temp1 = dest->klass_
2779       __ movl(temp1, Address(dest, class_offset));
2780       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2781         __ MaybeUnpoisonHeapReference(temp1);
2782         // Bail out if the destination is not a non primitive array.
2783         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2784         __ movl(temp2, Address(temp1, component_offset));
2785         __ testl(temp2, temp2);
2786         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2787         __ MaybeUnpoisonHeapReference(temp2);
2788         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2789         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2790         // Re-poison the heap reference to make the compare instruction below
2791         // compare two poisoned references.
2792         __ PoisonHeapReference(temp1);
2793       }
2794 
2795       // Note: if heap poisoning is on, we are comparing two poisoned references here.
2796       __ cmpl(temp1, Address(src, class_offset));
2797 
2798       if (optimizations.GetDestinationIsTypedObjectArray()) {
2799         NearLabel do_copy;
2800         __ j(kEqual, &do_copy);
2801         __ MaybeUnpoisonHeapReference(temp1);
2802         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2803         __ movl(temp1, Address(temp1, component_offset));
2804         __ MaybeUnpoisonHeapReference(temp1);
2805         __ cmpl(Address(temp1, super_offset), Immediate(0));
2806         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2807         __ Bind(&do_copy);
2808       } else {
2809         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2810       }
2811     }
2812   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2813     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2814     // Bail out if the source is not a non primitive array.
2815     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2816       // /* HeapReference<Class> */ temp1 = src->klass_
2817       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2818           invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2819       // /* HeapReference<Class> */ temp1 = temp1->component_type_
2820       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2821           invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2822       __ testl(temp1, temp1);
2823       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2824       // If heap poisoning is enabled, `temp1` has been unpoisoned
2825       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2826     } else {
2827       // /* HeapReference<Class> */ temp1 = src->klass_
2828       __ movl(temp1, Address(src, class_offset));
2829       __ MaybeUnpoisonHeapReference(temp1);
2830       // /* HeapReference<Class> */ temp1 = temp1->component_type_
2831       __ movl(temp1, Address(temp1, component_offset));
2832       __ testl(temp1, temp1);
2833       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2834       __ MaybeUnpoisonHeapReference(temp1);
2835     }
2836     __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2837     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2838   }
2839 
2840   const DataType::Type type = DataType::Type::kReference;
2841   const int32_t element_size = DataType::Size(type);
2842 
2843   // Compute the base source address in `temp1`.
2844   GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2845 
2846   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2847     // If it is needed (in the case of the fast-path loop), the base
2848     // destination address is computed later, as `temp2` is used for
2849     // intermediate computations.
2850 
2851     // Compute the end source address in `temp3`.
2852     if (length.IsStackSlot()) {
2853       // Location `length` is again pointing at a stack slot, as
2854       // register `temp3` (which was containing the length parameter
2855       // earlier) has been overwritten; restore it now
2856       DCHECK(length.Equals(length_arg));
2857       __ movl(temp3, Address(ESP, length.GetStackIndex()));
2858       length = Location::RegisterLocation(temp3);
2859     }
2860     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2861 
2862     // SystemArrayCopy implementation for Baker read barriers (see
2863     // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
2864     //
2865     //   if (src_ptr != end_ptr) {
2866     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2867     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2868     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
2869     //     if (is_gray) {
2870     //       // Slow-path copy.
2871     //       for (size_t i = 0; i != length; ++i) {
2872     //         dest_array[dest_pos + i] =
2873     //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
2874     //       }
2875     //     } else {
2876     //       // Fast-path copy.
2877     //       do {
2878     //         *dest_ptr++ = *src_ptr++;
2879     //       } while (src_ptr != end_ptr)
2880     //     }
2881     //   }
2882 
2883     NearLabel loop, done;
2884 
2885     // Don't enter copy loop if `length == 0`.
2886     __ cmpl(temp1, temp3);
2887     __ j(kEqual, &done);
2888 
2889     // Given the numeric representation, it's enough to check the low bit of the rb_state.
2890     static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2891     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2892     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
2893     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
2894     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
2895 
2896     // if (rb_state == ReadBarrier::GrayState())
2897     //   goto slow_path;
2898     // At this point, just do the "if" and make sure that flags are preserved until the branch.
2899     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
2900 
2901     // Load fence to prevent load-load reordering.
2902     // Note that this is a no-op, thanks to the x86 memory model.
2903     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2904 
2905     // Slow path used to copy array when `src` is gray.
2906     SlowPathCode* read_barrier_slow_path =
2907         new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
2908     codegen_->AddSlowPath(read_barrier_slow_path);
2909 
2910     // We have done the "if" of the gray bit check above, now branch based on the flags.
2911     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
2912 
2913     // Fast-path copy.
2914     // Compute the base destination address in `temp2`.
2915     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2916     // Iterate over the arrays and do a raw copy of the objects. We don't need to
2917     // poison/unpoison.
2918     __ Bind(&loop);
2919     __ pushl(Address(temp1, 0));
2920     __ cfi().AdjustCFAOffset(4);
2921     __ popl(Address(temp2, 0));
2922     __ cfi().AdjustCFAOffset(-4);
2923     __ addl(temp1, Immediate(element_size));
2924     __ addl(temp2, Immediate(element_size));
2925     __ cmpl(temp1, temp3);
2926     __ j(kNotEqual, &loop);
2927 
2928     __ Bind(read_barrier_slow_path->GetExitLabel());
2929     __ Bind(&done);
2930   } else {
2931     // Non read barrier code.
2932     // Compute the base destination address in `temp2`.
2933     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2934     // Compute the end source address in `temp3`.
2935     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2936     // Iterate over the arrays and do a raw copy of the objects. We don't need to
2937     // poison/unpoison.
2938     NearLabel loop, done;
2939     __ cmpl(temp1, temp3);
2940     __ j(kEqual, &done);
2941     __ Bind(&loop);
2942     __ pushl(Address(temp1, 0));
2943     __ cfi().AdjustCFAOffset(4);
2944     __ popl(Address(temp2, 0));
2945     __ cfi().AdjustCFAOffset(-4);
2946     __ addl(temp1, Immediate(element_size));
2947     __ addl(temp2, Immediate(element_size));
2948     __ cmpl(temp1, temp3);
2949     __ j(kNotEqual, &loop);
2950     __ Bind(&done);
2951   }
2952 
2953   // We only need one card marking on the destination array.
2954   codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false);
2955 
2956   __ Bind(intrinsic_slow_path->GetExitLabel());
2957 }
2958 
VisitIntegerValueOf(HInvoke * invoke)2959 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
2960   DCHECK(invoke->IsInvokeStaticOrDirect());
2961   InvokeRuntimeCallingConvention calling_convention;
2962   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2963       invoke,
2964       codegen_,
2965       Location::RegisterLocation(EAX),
2966       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2967 
2968   LocationSummary* locations = invoke->GetLocations();
2969   if (locations != nullptr) {
2970     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
2971     if (invoke_static_or_direct->HasSpecialInput() &&
2972         invoke->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
2973             ->IsX86ComputeBaseMethodAddress()) {
2974       locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
2975                          Location::RequiresRegister());
2976     }
2977   }
2978 }
2979 
VisitIntegerValueOf(HInvoke * invoke)2980 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
2981   DCHECK(invoke->IsInvokeStaticOrDirect());
2982   IntrinsicVisitor::IntegerValueOfInfo info =
2983       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2984   LocationSummary* locations = invoke->GetLocations();
2985   X86Assembler* assembler = GetAssembler();
2986 
2987   Register out = locations->Out().AsRegister<Register>();
2988   InvokeRuntimeCallingConvention calling_convention;
2989   if (invoke->InputAt(0)->IsConstant()) {
2990     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2991     if (static_cast<uint32_t>(value - info.low) < info.length) {
2992       // Just embed the j.l.Integer in the code.
2993       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2994       codegen_->LoadBootImageAddress(
2995           out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
2996     } else {
2997       DCHECK(locations->CanCall());
2998       // Allocate and initialize a new j.l.Integer.
2999       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3000       // JIT object table.
3001       codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
3002                                              info.integer_boot_image_offset);
3003       __ movl(Address(out, info.value_offset), Immediate(value));
3004     }
3005   } else {
3006     DCHECK(locations->CanCall());
3007     Register in = locations->InAt(0).AsRegister<Register>();
3008     // Check bounds of our cache.
3009     __ leal(out, Address(in, -info.low));
3010     __ cmpl(out, Immediate(info.length));
3011     NearLabel allocate, done;
3012     __ j(kAboveEqual, &allocate);
3013     // If the value is within the bounds, load the j.l.Integer directly from the array.
3014     constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3015     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3016                   "Check heap reference size.");
3017     if (codegen_->GetCompilerOptions().IsBootImage()) {
3018       DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3019       size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3020       HX86ComputeBaseMethodAddress* method_address =
3021           invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3022       DCHECK(method_address != nullptr);
3023       Register method_address_reg =
3024           invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3025       __ movl(out, Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kDummy32BitOffset));
3026       codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3027     } else {
3028       // Note: We're about to clobber the index in `out`, so we need to use `in` and
3029       // adjust the offset accordingly.
3030       uint32_t mid_array_boot_image_offset =
3031               info.array_data_boot_image_reference - info.low * kElementSize;
3032       codegen_->LoadBootImageAddress(
3033           out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3034       DCHECK_NE(out, in);
3035       __ movl(out, Address(out, in, TIMES_4, 0));
3036     }
3037     __ MaybeUnpoisonHeapReference(out);
3038     __ jmp(&done);
3039     __ Bind(&allocate);
3040     // Otherwise allocate and initialize a new j.l.Integer.
3041     codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
3042                                            info.integer_boot_image_offset);
3043     __ movl(Address(out, info.value_offset), in);
3044     __ Bind(&done);
3045   }
3046 }
3047 
VisitThreadInterrupted(HInvoke * invoke)3048 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3049   LocationSummary* locations =
3050       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3051   locations->SetOut(Location::RequiresRegister());
3052 }
3053 
VisitThreadInterrupted(HInvoke * invoke)3054 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3055   X86Assembler* assembler = GetAssembler();
3056   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3057   Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3058   NearLabel done;
3059   __ fs()->movl(out, address);
3060   __ testl(out, out);
3061   __ j(kEqual, &done);
3062   __ fs()->movl(address, Immediate(0));
3063   codegen_->MemoryFence();
3064   __ Bind(&done);
3065 }
3066 
VisitReachabilityFence(HInvoke * invoke)3067 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3068   LocationSummary* locations =
3069       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3070   locations->SetInAt(0, Location::Any());
3071 }
3072 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3073 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3074 
3075 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
3076 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
3077 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
3078 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
3079 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
3080 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
3081 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
3082 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
3083 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
3084 UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat)
3085 UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf)
3086 UNIMPLEMENTED_INTRINSIC(X86, FP16Floor)
3087 UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil)
3088 UNIMPLEMENTED_INTRINSIC(X86, FP16Rint)
3089 UNIMPLEMENTED_INTRINSIC(X86, FP16Greater)
3090 UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals)
3091 UNIMPLEMENTED_INTRINSIC(X86, FP16Less)
3092 UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals)
3093 
3094 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
3095 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
3096 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
3097 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
3098 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
3099 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject);
3100 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString);
3101 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence);
3102 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray);
3103 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean);
3104 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar);
3105 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt);
3106 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong);
3107 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat);
3108 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble);
3109 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
3110 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
3111 
3112 // 1.8.
3113 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
3114 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
3115 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
3116 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
3117 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
3118 
3119 UNREACHABLE_INTRINSICS(X86)
3120 
3121 #undef __
3122 
3123 }  // namespace x86
3124 }  // namespace art
3125