1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86.h"
18 
19 #include <limits>
20 
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string.h"
35 #include "mirror/var_handle.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread-current-inl.h"
38 #include "utils/x86/assembler_x86.h"
39 #include "utils/x86/constants_x86.h"
40 
41 namespace art {
42 
43 namespace x86 {
44 
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)45 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
46   : allocator_(codegen->GetGraph()->GetAllocator()),
47     codegen_(codegen) {
48 }
49 
50 
GetAssembler()51 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
52   return down_cast<X86Assembler*>(codegen_->GetAssembler());
53 }
54 
GetAllocator()55 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
56   return codegen_->GetGraph()->GetAllocator();
57 }
58 
TryDispatch(HInvoke * invoke)59 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
60   Dispatch(invoke);
61   LocationSummary* res = invoke->GetLocations();
62   if (res == nullptr) {
63     return false;
64   }
65   return res->Intrinsified();
66 }
67 
68 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
69 
70 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
71 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
72 
73 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
74 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
75  public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)76   explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
77       : SlowPathCode(instruction) {
78     DCHECK(kEmitCompilerReadBarrier);
79     DCHECK(kUseBakerReadBarrier);
80   }
81 
EmitNativeCode(CodeGenerator * codegen)82   void EmitNativeCode(CodeGenerator* codegen) override {
83     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
84     LocationSummary* locations = instruction_->GetLocations();
85     DCHECK(locations->CanCall());
86     DCHECK(instruction_->IsInvokeStaticOrDirect())
87         << "Unexpected instruction in read barrier arraycopy slow path: "
88         << instruction_->DebugName();
89     DCHECK(instruction_->GetLocations()->Intrinsified());
90     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
91 
92     int32_t element_size = DataType::Size(DataType::Type::kReference);
93     uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
94 
95     Register src = locations->InAt(0).AsRegister<Register>();
96     Location src_pos = locations->InAt(1);
97     Register dest = locations->InAt(2).AsRegister<Register>();
98     Location dest_pos = locations->InAt(3);
99     Location length = locations->InAt(4);
100     Location temp1_loc = locations->GetTemp(0);
101     Register temp1 = temp1_loc.AsRegister<Register>();
102     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
103     Register temp3 = locations->GetTemp(2).AsRegister<Register>();
104 
105     __ Bind(GetEntryLabel());
106     // In this code path, registers `temp1`, `temp2`, and `temp3`
107     // (resp.) are not used for the base source address, the base
108     // destination address, and the end source address (resp.), as in
109     // other SystemArrayCopy intrinsic code paths.  Instead they are
110     // (resp.) used for:
111     // - the loop index (`i`);
112     // - the source index (`src_index`) and the loaded (source)
113     //   reference (`value`); and
114     // - the destination index (`dest_index`).
115 
116     // i = 0
117     __ xorl(temp1, temp1);
118     NearLabel loop;
119     __ Bind(&loop);
120     // value = src_array[i + src_pos]
121     if (src_pos.IsConstant()) {
122       int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
123       int32_t adjusted_offset = offset + constant * element_size;
124       __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
125     } else {
126       __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
127       __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
128     }
129     __ MaybeUnpoisonHeapReference(temp2);
130     // TODO: Inline the mark bit check before calling the runtime?
131     // value = ReadBarrier::Mark(value)
132     // No need to save live registers; it's taken care of by the
133     // entrypoint. Also, there is no need to update the stack mask,
134     // as this runtime call will not trigger a garbage collection.
135     // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
136     // explanations.)
137     DCHECK_NE(temp2, ESP);
138     DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
139     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
140     // This runtime call does not require a stack map.
141     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
142     __ MaybePoisonHeapReference(temp2);
143     // dest_array[i + dest_pos] = value
144     if (dest_pos.IsConstant()) {
145       int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
146       int32_t adjusted_offset = offset + constant * element_size;
147       __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
148     } else {
149       __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
150       __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
151     }
152     // ++i
153     __ addl(temp1, Immediate(1));
154     // if (i != length) goto loop
155     x86_codegen->GenerateIntCompare(temp1_loc, length);
156     __ j(kNotEqual, &loop);
157     __ jmp(GetExitLabel());
158   }
159 
GetDescription() const160   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
161 
162  private:
163   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
164 };
165 
166 #undef __
167 
168 #define __ assembler->
169 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)170 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
171   LocationSummary* locations =
172       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
173   locations->SetInAt(0, Location::RequiresFpuRegister());
174   locations->SetOut(Location::RequiresRegister());
175   if (is64bit) {
176     locations->AddTemp(Location::RequiresFpuRegister());
177   }
178 }
179 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)180 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
181   LocationSummary* locations =
182       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
183   locations->SetInAt(0, Location::RequiresRegister());
184   locations->SetOut(Location::RequiresFpuRegister());
185   if (is64bit) {
186     locations->AddTemp(Location::RequiresFpuRegister());
187     locations->AddTemp(Location::RequiresFpuRegister());
188   }
189 }
190 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)191 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
192   Location input = locations->InAt(0);
193   Location output = locations->Out();
194   if (is64bit) {
195     // Need to use the temporary.
196     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
197     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
198     __ movd(output.AsRegisterPairLow<Register>(), temp);
199     __ psrlq(temp, Immediate(32));
200     __ movd(output.AsRegisterPairHigh<Register>(), temp);
201   } else {
202     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
203   }
204 }
205 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)206 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
207   Location input = locations->InAt(0);
208   Location output = locations->Out();
209   if (is64bit) {
210     // Need to use the temporary.
211     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
212     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
213     __ movd(temp1, input.AsRegisterPairLow<Register>());
214     __ movd(temp2, input.AsRegisterPairHigh<Register>());
215     __ punpckldq(temp1, temp2);
216     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
217   } else {
218     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
219   }
220 }
221 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)222 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
223   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
224 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)225 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
226   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
227 }
228 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)229 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
230   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
231 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)232 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
233   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
234 }
235 
VisitFloatFloatToRawIntBits(HInvoke * invoke)236 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
237   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
238 }
VisitFloatIntBitsToFloat(HInvoke * invoke)239 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
240   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
241 }
242 
VisitFloatFloatToRawIntBits(HInvoke * invoke)243 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
244   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
245 }
VisitFloatIntBitsToFloat(HInvoke * invoke)246 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
247   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
248 }
249 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)250 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
251   LocationSummary* locations =
252       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
253   locations->SetInAt(0, Location::RequiresRegister());
254   locations->SetOut(Location::SameAsFirstInput());
255 }
256 
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)257 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
258   LocationSummary* locations =
259       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
260   locations->SetInAt(0, Location::RequiresRegister());
261   locations->SetOut(Location::RequiresRegister());
262 }
263 
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)264 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
265   LocationSummary* locations =
266       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
267   locations->SetInAt(0, Location::RequiresRegister());
268   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
269 }
270 
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)271 static void GenReverseBytes(LocationSummary* locations,
272                             DataType::Type size,
273                             X86Assembler* assembler) {
274   Register out = locations->Out().AsRegister<Register>();
275 
276   switch (size) {
277     case DataType::Type::kInt16:
278       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
279       __ bswapl(out);
280       __ sarl(out, Immediate(16));
281       break;
282     case DataType::Type::kInt32:
283       __ bswapl(out);
284       break;
285     default:
286       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
287       UNREACHABLE();
288   }
289 }
290 
VisitIntegerReverseBytes(HInvoke * invoke)291 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
292   CreateIntToIntLocations(allocator_, invoke);
293 }
294 
VisitIntegerReverseBytes(HInvoke * invoke)295 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
296   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
297 }
298 
VisitLongReverseBytes(HInvoke * invoke)299 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
300   CreateLongToLongLocations(allocator_, invoke);
301 }
302 
VisitLongReverseBytes(HInvoke * invoke)303 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
304   LocationSummary* locations = invoke->GetLocations();
305   Location input = locations->InAt(0);
306   Register input_lo = input.AsRegisterPairLow<Register>();
307   Register input_hi = input.AsRegisterPairHigh<Register>();
308   Location output = locations->Out();
309   Register output_lo = output.AsRegisterPairLow<Register>();
310   Register output_hi = output.AsRegisterPairHigh<Register>();
311 
312   X86Assembler* assembler = GetAssembler();
313   // Assign the inputs to the outputs, mixing low/high.
314   __ movl(output_lo, input_hi);
315   __ movl(output_hi, input_lo);
316   __ bswapl(output_lo);
317   __ bswapl(output_hi);
318 }
319 
VisitShortReverseBytes(HInvoke * invoke)320 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
321   CreateIntToIntLocations(allocator_, invoke);
322 }
323 
VisitShortReverseBytes(HInvoke * invoke)324 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
325   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
326 }
327 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)328 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
329   LocationSummary* locations =
330       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
331   locations->SetInAt(0, Location::RequiresFpuRegister());
332   locations->SetOut(Location::RequiresFpuRegister());
333 }
334 
VisitMathSqrt(HInvoke * invoke)335 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
336   CreateFPToFPLocations(allocator_, invoke);
337 }
338 
VisitMathSqrt(HInvoke * invoke)339 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
340   LocationSummary* locations = invoke->GetLocations();
341   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
342   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
343 
344   GetAssembler()->sqrtsd(out, in);
345 }
346 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)347 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
348                                        HInvoke* invoke,
349                                        CodeGeneratorX86* codegen) {
350   // Do we have instruction support?
351   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
352     return;
353   }
354 
355   CreateFPToFPLocations(allocator, invoke);
356 }
357 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)358 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
359   LocationSummary* locations = invoke->GetLocations();
360   DCHECK(!locations->WillCall());
361   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
362   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
363   __ roundsd(out, in, Immediate(round_mode));
364 }
365 
VisitMathCeil(HInvoke * invoke)366 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
367   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
368 }
369 
VisitMathCeil(HInvoke * invoke)370 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
371   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
372 }
373 
VisitMathFloor(HInvoke * invoke)374 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
375   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
376 }
377 
VisitMathFloor(HInvoke * invoke)378 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
379   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
380 }
381 
VisitMathRint(HInvoke * invoke)382 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
383   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
384 }
385 
VisitMathRint(HInvoke * invoke)386 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
387   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
388 }
389 
VisitMathRoundFloat(HInvoke * invoke)390 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
391   // Do we have instruction support?
392   if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
393     return;
394   }
395 
396   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
397   DCHECK(static_or_direct != nullptr);
398   LocationSummary* locations =
399       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
400   locations->SetInAt(0, Location::RequiresFpuRegister());
401   if (static_or_direct->HasSpecialInput() &&
402       invoke->InputAt(
403           static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
404     locations->SetInAt(1, Location::RequiresRegister());
405   }
406   locations->SetOut(Location::RequiresRegister());
407   locations->AddTemp(Location::RequiresFpuRegister());
408   locations->AddTemp(Location::RequiresFpuRegister());
409 }
410 
VisitMathRoundFloat(HInvoke * invoke)411 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
412   LocationSummary* locations = invoke->GetLocations();
413   DCHECK(!locations->WillCall());
414 
415   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
416   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
417   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
418   Register out = locations->Out().AsRegister<Register>();
419   NearLabel skip_incr, done;
420   X86Assembler* assembler = GetAssembler();
421 
422   // Since no direct x86 rounding instruction matches the required semantics,
423   // this intrinsic is implemented as follows:
424   //  result = floor(in);
425   //  if (in - result >= 0.5f)
426   //    result = result + 1.0f;
427   __ movss(t2, in);
428   __ roundss(t1, in, Immediate(1));
429   __ subss(t2, t1);
430   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
431     // Direct constant area available.
432     HX86ComputeBaseMethodAddress* method_address =
433         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
434     Register constant_area = locations->InAt(1).AsRegister<Register>();
435     __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
436                                                 method_address,
437                                                 constant_area));
438     __ j(kBelow, &skip_incr);
439     __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
440                                                method_address,
441                                                constant_area));
442     __ Bind(&skip_incr);
443   } else {
444     // No constant area: go through stack.
445     __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
446     __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
447     __ comiss(t2, Address(ESP, 4));
448     __ j(kBelow, &skip_incr);
449     __ addss(t1, Address(ESP, 0));
450     __ Bind(&skip_incr);
451     __ addl(ESP, Immediate(8));
452   }
453 
454   // Final conversion to an integer. Unfortunately this also does not have a
455   // direct x86 instruction, since NaN should map to 0 and large positive
456   // values need to be clipped to the extreme value.
457   __ movl(out, Immediate(kPrimIntMax));
458   __ cvtsi2ss(t2, out);
459   __ comiss(t1, t2);
460   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
461   __ movl(out, Immediate(0));  // does not change flags
462   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
463   __ cvttss2si(out, t1);
464   __ Bind(&done);
465 }
466 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)467 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
468   LocationSummary* locations =
469       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
470   InvokeRuntimeCallingConvention calling_convention;
471   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
472   locations->SetOut(Location::FpuRegisterLocation(XMM0));
473 }
474 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)475 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
476   LocationSummary* locations = invoke->GetLocations();
477   DCHECK(locations->WillCall());
478   DCHECK(invoke->IsInvokeStaticOrDirect());
479   X86Assembler* assembler = codegen->GetAssembler();
480 
481   // We need some place to pass the parameters.
482   __ subl(ESP, Immediate(16));
483   __ cfi().AdjustCFAOffset(16);
484 
485   // Pass the parameters at the bottom of the stack.
486   __ movsd(Address(ESP, 0), XMM0);
487 
488   // If we have a second parameter, pass it next.
489   if (invoke->GetNumberOfArguments() == 2) {
490     __ movsd(Address(ESP, 8), XMM1);
491   }
492 
493   // Now do the actual call.
494   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
495 
496   // Extract the return value from the FP stack.
497   __ fstpl(Address(ESP, 0));
498   __ movsd(XMM0, Address(ESP, 0));
499 
500   // And clean up the stack.
501   __ addl(ESP, Immediate(16));
502   __ cfi().AdjustCFAOffset(-16);
503 }
504 
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)505 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
506   LocationSummary* locations =
507       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
508   if (is_long) {
509     locations->SetInAt(0, Location::RequiresRegister());
510   } else {
511     locations->SetInAt(0, Location::Any());
512   }
513   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
514 }
515 
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)516 static void GenLowestOneBit(X86Assembler* assembler,
517                       CodeGeneratorX86* codegen,
518                       bool is_long,
519                       HInvoke* invoke) {
520   LocationSummary* locations = invoke->GetLocations();
521   Location src = locations->InAt(0);
522   Location out_loc = locations->Out();
523 
524   if (invoke->InputAt(0)->IsConstant()) {
525     // Evaluate this at compile time.
526     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
527     if (value == 0) {
528       if (is_long) {
529         __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
530         __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
531       } else {
532         __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
533       }
534       return;
535     }
536     // Nonzero value.
537     value = is_long ? CTZ(static_cast<uint64_t>(value))
538                     : CTZ(static_cast<uint32_t>(value));
539     if (is_long) {
540       if (value >= 32) {
541         int shift = value-32;
542         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
543         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
544       } else {
545         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
546         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
547       }
548     } else {
549       codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
550     }
551     return;
552   }
553   // Handle non constant case
554   if (is_long) {
555     DCHECK(src.IsRegisterPair());
556     Register src_lo = src.AsRegisterPairLow<Register>();
557     Register src_hi = src.AsRegisterPairHigh<Register>();
558 
559     Register out_lo = out_loc.AsRegisterPairLow<Register>();
560     Register out_hi = out_loc.AsRegisterPairHigh<Register>();
561 
562     __ movl(out_lo, src_lo);
563     __ movl(out_hi, src_hi);
564 
565     __ negl(out_lo);
566     __ adcl(out_hi, Immediate(0));
567     __ negl(out_hi);
568 
569     __ andl(out_lo, src_lo);
570     __ andl(out_hi, src_hi);
571   } else {
572     if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
573       Register out = out_loc.AsRegister<Register>();
574       __ blsi(out, src.AsRegister<Register>());
575     } else {
576       Register out = out_loc.AsRegister<Register>();
577       // Do tmp & -tmp
578       if (src.IsRegister()) {
579         __ movl(out, src.AsRegister<Register>());
580       } else {
581         DCHECK(src.IsStackSlot());
582         __ movl(out, Address(ESP, src.GetStackIndex()));
583       }
584       __ negl(out);
585 
586       if (src.IsRegister()) {
587         __ andl(out, src.AsRegister<Register>());
588       } else {
589         __ andl(out, Address(ESP, src.GetStackIndex()));
590       }
591     }
592   }
593 }
594 
VisitMathCos(HInvoke * invoke)595 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
596   CreateFPToFPCallLocations(allocator_, invoke);
597 }
598 
VisitMathCos(HInvoke * invoke)599 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
600   GenFPToFPCall(invoke, codegen_, kQuickCos);
601 }
602 
VisitMathSin(HInvoke * invoke)603 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
604   CreateFPToFPCallLocations(allocator_, invoke);
605 }
606 
VisitMathSin(HInvoke * invoke)607 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
608   GenFPToFPCall(invoke, codegen_, kQuickSin);
609 }
610 
VisitMathAcos(HInvoke * invoke)611 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
612   CreateFPToFPCallLocations(allocator_, invoke);
613 }
614 
VisitMathAcos(HInvoke * invoke)615 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
616   GenFPToFPCall(invoke, codegen_, kQuickAcos);
617 }
618 
VisitMathAsin(HInvoke * invoke)619 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
620   CreateFPToFPCallLocations(allocator_, invoke);
621 }
622 
VisitMathAsin(HInvoke * invoke)623 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
624   GenFPToFPCall(invoke, codegen_, kQuickAsin);
625 }
626 
VisitMathAtan(HInvoke * invoke)627 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
628   CreateFPToFPCallLocations(allocator_, invoke);
629 }
630 
VisitMathAtan(HInvoke * invoke)631 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
632   GenFPToFPCall(invoke, codegen_, kQuickAtan);
633 }
634 
VisitMathCbrt(HInvoke * invoke)635 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
636   CreateFPToFPCallLocations(allocator_, invoke);
637 }
638 
VisitMathCbrt(HInvoke * invoke)639 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
640   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
641 }
642 
VisitMathCosh(HInvoke * invoke)643 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
644   CreateFPToFPCallLocations(allocator_, invoke);
645 }
646 
VisitMathCosh(HInvoke * invoke)647 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
648   GenFPToFPCall(invoke, codegen_, kQuickCosh);
649 }
650 
VisitMathExp(HInvoke * invoke)651 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
652   CreateFPToFPCallLocations(allocator_, invoke);
653 }
654 
VisitMathExp(HInvoke * invoke)655 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
656   GenFPToFPCall(invoke, codegen_, kQuickExp);
657 }
658 
VisitMathExpm1(HInvoke * invoke)659 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
660   CreateFPToFPCallLocations(allocator_, invoke);
661 }
662 
VisitMathExpm1(HInvoke * invoke)663 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
664   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
665 }
666 
VisitMathLog(HInvoke * invoke)667 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
668   CreateFPToFPCallLocations(allocator_, invoke);
669 }
670 
VisitMathLog(HInvoke * invoke)671 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
672   GenFPToFPCall(invoke, codegen_, kQuickLog);
673 }
674 
VisitMathLog10(HInvoke * invoke)675 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
676   CreateFPToFPCallLocations(allocator_, invoke);
677 }
678 
VisitMathLog10(HInvoke * invoke)679 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
680   GenFPToFPCall(invoke, codegen_, kQuickLog10);
681 }
682 
VisitMathSinh(HInvoke * invoke)683 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
684   CreateFPToFPCallLocations(allocator_, invoke);
685 }
686 
VisitMathSinh(HInvoke * invoke)687 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
688   GenFPToFPCall(invoke, codegen_, kQuickSinh);
689 }
690 
VisitMathTan(HInvoke * invoke)691 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
692   CreateFPToFPCallLocations(allocator_, invoke);
693 }
694 
VisitMathTan(HInvoke * invoke)695 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
696   GenFPToFPCall(invoke, codegen_, kQuickTan);
697 }
698 
VisitMathTanh(HInvoke * invoke)699 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
700   CreateFPToFPCallLocations(allocator_, invoke);
701 }
702 
VisitMathTanh(HInvoke * invoke)703 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
704   GenFPToFPCall(invoke, codegen_, kQuickTanh);
705 }
706 
VisitIntegerLowestOneBit(HInvoke * invoke)707 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
708   CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
709 }
VisitIntegerLowestOneBit(HInvoke * invoke)710 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
711   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
712 }
713 
VisitLongLowestOneBit(HInvoke * invoke)714 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
715   CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
716 }
717 
VisitLongLowestOneBit(HInvoke * invoke)718 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
719   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
720 }
721 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)722 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
723   LocationSummary* locations =
724       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
725   InvokeRuntimeCallingConvention calling_convention;
726   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
727   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
728   locations->SetOut(Location::FpuRegisterLocation(XMM0));
729 }
730 
VisitMathAtan2(HInvoke * invoke)731 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
732   CreateFPFPToFPCallLocations(allocator_, invoke);
733 }
734 
VisitMathAtan2(HInvoke * invoke)735 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
736   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
737 }
738 
VisitMathPow(HInvoke * invoke)739 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
740   CreateFPFPToFPCallLocations(allocator_, invoke);
741 }
742 
VisitMathPow(HInvoke * invoke)743 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
744   GenFPToFPCall(invoke, codegen_, kQuickPow);
745 }
746 
VisitMathHypot(HInvoke * invoke)747 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
748   CreateFPFPToFPCallLocations(allocator_, invoke);
749 }
750 
VisitMathHypot(HInvoke * invoke)751 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
752   GenFPToFPCall(invoke, codegen_, kQuickHypot);
753 }
754 
VisitMathNextAfter(HInvoke * invoke)755 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
756   CreateFPFPToFPCallLocations(allocator_, invoke);
757 }
758 
VisitMathNextAfter(HInvoke * invoke)759 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
760   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
761 }
762 
VisitSystemArrayCopyChar(HInvoke * invoke)763 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
764   // We need at least two of the positions or length to be an integer constant,
765   // or else we won't have enough free registers.
766   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
767   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
768   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
769 
770   int num_constants =
771       ((src_pos != nullptr) ? 1 : 0)
772       + ((dest_pos != nullptr) ? 1 : 0)
773       + ((length != nullptr) ? 1 : 0);
774 
775   if (num_constants < 2) {
776     // Not enough free registers.
777     return;
778   }
779 
780   // As long as we are checking, we might as well check to see if the src and dest
781   // positions are >= 0.
782   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
783       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
784     // We will have to fail anyways.
785     return;
786   }
787 
788   // And since we are already checking, check the length too.
789   if (length != nullptr) {
790     int32_t len = length->GetValue();
791     if (len < 0) {
792       // Just call as normal.
793       return;
794     }
795   }
796 
797   // Okay, it is safe to generate inline code.
798   LocationSummary* locations =
799       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
800   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
801   locations->SetInAt(0, Location::RequiresRegister());
802   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
803   locations->SetInAt(2, Location::RequiresRegister());
804   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
805   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
806 
807   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
808   locations->AddTemp(Location::RegisterLocation(ESI));
809   locations->AddTemp(Location::RegisterLocation(EDI));
810   locations->AddTemp(Location::RegisterLocation(ECX));
811 }
812 
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)813 static void CheckPosition(X86Assembler* assembler,
814                           Location pos,
815                           Register input,
816                           Location length,
817                           SlowPathCode* slow_path,
818                           Register temp,
819                           bool length_is_input_length = false) {
820   // Where is the length in the Array?
821   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
822 
823   if (pos.IsConstant()) {
824     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
825     if (pos_const == 0) {
826       if (!length_is_input_length) {
827         // Check that length(input) >= length.
828         if (length.IsConstant()) {
829           __ cmpl(Address(input, length_offset),
830                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
831         } else {
832           __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
833         }
834         __ j(kLess, slow_path->GetEntryLabel());
835       }
836     } else {
837       // Check that length(input) >= pos.
838       __ movl(temp, Address(input, length_offset));
839       __ subl(temp, Immediate(pos_const));
840       __ j(kLess, slow_path->GetEntryLabel());
841 
842       // Check that (length(input) - pos) >= length.
843       if (length.IsConstant()) {
844         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
845       } else {
846         __ cmpl(temp, length.AsRegister<Register>());
847       }
848       __ j(kLess, slow_path->GetEntryLabel());
849     }
850   } else if (length_is_input_length) {
851     // The only way the copy can succeed is if pos is zero.
852     Register pos_reg = pos.AsRegister<Register>();
853     __ testl(pos_reg, pos_reg);
854     __ j(kNotEqual, slow_path->GetEntryLabel());
855   } else {
856     // Check that pos >= 0.
857     Register pos_reg = pos.AsRegister<Register>();
858     __ testl(pos_reg, pos_reg);
859     __ j(kLess, slow_path->GetEntryLabel());
860 
861     // Check that pos <= length(input).
862     __ cmpl(Address(input, length_offset), pos_reg);
863     __ j(kLess, slow_path->GetEntryLabel());
864 
865     // Check that (length(input) - pos) >= length.
866     __ movl(temp, Address(input, length_offset));
867     __ subl(temp, pos_reg);
868     if (length.IsConstant()) {
869       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
870     } else {
871       __ cmpl(temp, length.AsRegister<Register>());
872     }
873     __ j(kLess, slow_path->GetEntryLabel());
874   }
875 }
876 
VisitSystemArrayCopyChar(HInvoke * invoke)877 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
878   X86Assembler* assembler = GetAssembler();
879   LocationSummary* locations = invoke->GetLocations();
880 
881   Register src = locations->InAt(0).AsRegister<Register>();
882   Location srcPos = locations->InAt(1);
883   Register dest = locations->InAt(2).AsRegister<Register>();
884   Location destPos = locations->InAt(3);
885   Location length = locations->InAt(4);
886 
887   // Temporaries that we need for MOVSW.
888   Register src_base = locations->GetTemp(0).AsRegister<Register>();
889   DCHECK_EQ(src_base, ESI);
890   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
891   DCHECK_EQ(dest_base, EDI);
892   Register count = locations->GetTemp(2).AsRegister<Register>();
893   DCHECK_EQ(count, ECX);
894 
895   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
896   codegen_->AddSlowPath(slow_path);
897 
898   // Bail out if the source and destination are the same (to handle overlap).
899   __ cmpl(src, dest);
900   __ j(kEqual, slow_path->GetEntryLabel());
901 
902   // Bail out if the source is null.
903   __ testl(src, src);
904   __ j(kEqual, slow_path->GetEntryLabel());
905 
906   // Bail out if the destination is null.
907   __ testl(dest, dest);
908   __ j(kEqual, slow_path->GetEntryLabel());
909 
910   // If the length is negative, bail out.
911   // We have already checked in the LocationsBuilder for the constant case.
912   if (!length.IsConstant()) {
913     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
914     __ j(kLess, slow_path->GetEntryLabel());
915   }
916 
917   // We need the count in ECX.
918   if (length.IsConstant()) {
919     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
920   } else {
921     __ movl(count, length.AsRegister<Register>());
922   }
923 
924   // Validity checks: source. Use src_base as a temporary register.
925   CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
926 
927   // Validity checks: dest. Use src_base as a temporary register.
928   CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
929 
930   // Okay, everything checks out.  Finally time to do the copy.
931   // Check assumption that sizeof(Char) is 2 (used in scaling below).
932   const size_t char_size = DataType::Size(DataType::Type::kUint16);
933   DCHECK_EQ(char_size, 2u);
934 
935   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
936 
937   if (srcPos.IsConstant()) {
938     int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
939     __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
940   } else {
941     __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
942                               ScaleFactor::TIMES_2, data_offset));
943   }
944   if (destPos.IsConstant()) {
945     int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
946 
947     __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
948   } else {
949     __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
950                                ScaleFactor::TIMES_2, data_offset));
951   }
952 
953   // Do the move.
954   __ rep_movsw();
955 
956   __ Bind(slow_path->GetExitLabel());
957 }
958 
VisitStringCompareTo(HInvoke * invoke)959 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
960   // The inputs plus one temp.
961   LocationSummary* locations = new (allocator_) LocationSummary(
962       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
963   InvokeRuntimeCallingConvention calling_convention;
964   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
965   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
966   locations->SetOut(Location::RegisterLocation(EAX));
967 }
968 
VisitStringCompareTo(HInvoke * invoke)969 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
970   X86Assembler* assembler = GetAssembler();
971   LocationSummary* locations = invoke->GetLocations();
972 
973   // Note that the null check must have been done earlier.
974   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
975 
976   Register argument = locations->InAt(1).AsRegister<Register>();
977   __ testl(argument, argument);
978   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
979   codegen_->AddSlowPath(slow_path);
980   __ j(kEqual, slow_path->GetEntryLabel());
981 
982   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
983   __ Bind(slow_path->GetExitLabel());
984 }
985 
VisitStringEquals(HInvoke * invoke)986 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
987   LocationSummary* locations =
988       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
989   locations->SetInAt(0, Location::RequiresRegister());
990   locations->SetInAt(1, Location::RequiresRegister());
991 
992   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
993   locations->AddTemp(Location::RegisterLocation(ECX));
994   locations->AddTemp(Location::RegisterLocation(EDI));
995 
996   // Set output, ESI needed for repe_cmpsl instruction anyways.
997   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
998 }
999 
VisitStringEquals(HInvoke * invoke)1000 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1001   X86Assembler* assembler = GetAssembler();
1002   LocationSummary* locations = invoke->GetLocations();
1003 
1004   Register str = locations->InAt(0).AsRegister<Register>();
1005   Register arg = locations->InAt(1).AsRegister<Register>();
1006   Register ecx = locations->GetTemp(0).AsRegister<Register>();
1007   Register edi = locations->GetTemp(1).AsRegister<Register>();
1008   Register esi = locations->Out().AsRegister<Register>();
1009 
1010   NearLabel end, return_true, return_false;
1011 
1012   // Get offsets of count, value, and class fields within a string object.
1013   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1014   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1015   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1016 
1017   // Note that the null check must have been done earlier.
1018   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1019 
1020   StringEqualsOptimizations optimizations(invoke);
1021   if (!optimizations.GetArgumentNotNull()) {
1022     // Check if input is null, return false if it is.
1023     __ testl(arg, arg);
1024     __ j(kEqual, &return_false);
1025   }
1026 
1027   if (!optimizations.GetArgumentIsString()) {
1028     // Instanceof check for the argument by comparing class fields.
1029     // All string objects must have the same type since String cannot be subclassed.
1030     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1031     // If the argument is a string object, its class field must be equal to receiver's class field.
1032     //
1033     // As the String class is expected to be non-movable, we can read the class
1034     // field from String.equals' arguments without read barriers.
1035     AssertNonMovableStringClass();
1036     // Also, because we use the loaded class references only to compare them, we
1037     // don't need to unpoison them.
1038     // /* HeapReference<Class> */ ecx = str->klass_
1039     __ movl(ecx, Address(str, class_offset));
1040     // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1041     __ cmpl(ecx, Address(arg, class_offset));
1042     __ j(kNotEqual, &return_false);
1043   }
1044 
1045   // Reference equality check, return true if same reference.
1046   __ cmpl(str, arg);
1047   __ j(kEqual, &return_true);
1048 
1049   // Load length and compression flag of receiver string.
1050   __ movl(ecx, Address(str, count_offset));
1051   // Check if lengths and compression flags are equal, return false if they're not.
1052   // Two identical strings will always have same compression style since
1053   // compression style is decided on alloc.
1054   __ cmpl(ecx, Address(arg, count_offset));
1055   __ j(kNotEqual, &return_false);
1056   // Return true if strings are empty. Even with string compression `count == 0` means empty.
1057   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1058                 "Expecting 0=compressed, 1=uncompressed");
1059   __ jecxz(&return_true);
1060 
1061   if (mirror::kUseStringCompression) {
1062     NearLabel string_uncompressed;
1063     // Extract length and differentiate between both compressed or both uncompressed.
1064     // Different compression style is cut above.
1065     __ shrl(ecx, Immediate(1));
1066     __ j(kCarrySet, &string_uncompressed);
1067     // Divide string length by 2, rounding up, and continue as if uncompressed.
1068     __ addl(ecx, Immediate(1));
1069     __ shrl(ecx, Immediate(1));
1070     __ Bind(&string_uncompressed);
1071   }
1072   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1073   __ leal(esi, Address(str, value_offset));
1074   __ leal(edi, Address(arg, value_offset));
1075 
1076   // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1077   // divisible by 2.
1078   __ addl(ecx, Immediate(1));
1079   __ shrl(ecx, Immediate(1));
1080 
1081   // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1082   // or 4 characters (compressed) at a time.
1083   DCHECK_ALIGNED(value_offset, 4);
1084   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1085 
1086   // Loop to compare strings two characters at a time starting at the beginning of the string.
1087   __ repe_cmpsl();
1088   // If strings are not equal, zero flag will be cleared.
1089   __ j(kNotEqual, &return_false);
1090 
1091   // Return true and exit the function.
1092   // If loop does not result in returning false, we return true.
1093   __ Bind(&return_true);
1094   __ movl(esi, Immediate(1));
1095   __ jmp(&end);
1096 
1097   // Return false and exit the function.
1098   __ Bind(&return_false);
1099   __ xorl(esi, esi);
1100   __ Bind(&end);
1101 }
1102 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1103 static void CreateStringIndexOfLocations(HInvoke* invoke,
1104                                          ArenaAllocator* allocator,
1105                                          bool start_at_zero) {
1106   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1107                                                                LocationSummary::kCallOnSlowPath,
1108                                                                kIntrinsified);
1109   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1110   locations->SetInAt(0, Location::RegisterLocation(EDI));
1111   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1112   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1113   // of the instruction explicitly.
1114   // Note: This works as we don't clobber EAX anywhere.
1115   locations->SetInAt(1, Location::RegisterLocation(EAX));
1116   if (!start_at_zero) {
1117     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1118   }
1119   // As we clobber EDI during execution anyways, also use it as the output.
1120   locations->SetOut(Location::SameAsFirstInput());
1121 
1122   // repne scasw uses ECX as the counter.
1123   locations->AddTemp(Location::RegisterLocation(ECX));
1124   // Need another temporary to be able to compute the result.
1125   locations->AddTemp(Location::RequiresRegister());
1126   if (mirror::kUseStringCompression) {
1127     // Need another temporary to be able to save unflagged string length.
1128     locations->AddTemp(Location::RequiresRegister());
1129   }
1130 }
1131 
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1132 static void GenerateStringIndexOf(HInvoke* invoke,
1133                                   X86Assembler* assembler,
1134                                   CodeGeneratorX86* codegen,
1135                                   bool start_at_zero) {
1136   LocationSummary* locations = invoke->GetLocations();
1137 
1138   // Note that the null check must have been done earlier.
1139   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1140 
1141   Register string_obj = locations->InAt(0).AsRegister<Register>();
1142   Register search_value = locations->InAt(1).AsRegister<Register>();
1143   Register counter = locations->GetTemp(0).AsRegister<Register>();
1144   Register string_length = locations->GetTemp(1).AsRegister<Register>();
1145   Register out = locations->Out().AsRegister<Register>();
1146   // Only used when string compression feature is on.
1147   Register string_length_flagged;
1148 
1149   // Check our assumptions for registers.
1150   DCHECK_EQ(string_obj, EDI);
1151   DCHECK_EQ(search_value, EAX);
1152   DCHECK_EQ(counter, ECX);
1153   DCHECK_EQ(out, EDI);
1154 
1155   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1156   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1157   SlowPathCode* slow_path = nullptr;
1158   HInstruction* code_point = invoke->InputAt(1);
1159   if (code_point->IsIntConstant()) {
1160     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1161     std::numeric_limits<uint16_t>::max()) {
1162       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1163       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1164       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1165       codegen->AddSlowPath(slow_path);
1166       __ jmp(slow_path->GetEntryLabel());
1167       __ Bind(slow_path->GetExitLabel());
1168       return;
1169     }
1170   } else if (code_point->GetType() != DataType::Type::kUint16) {
1171     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1172     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1173     codegen->AddSlowPath(slow_path);
1174     __ j(kAbove, slow_path->GetEntryLabel());
1175   }
1176 
1177   // From here down, we know that we are looking for a char that fits in 16 bits.
1178   // Location of reference to data array within the String object.
1179   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1180   // Location of count within the String object.
1181   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1182 
1183   // Load the count field of the string containing the length and compression flag.
1184   __ movl(string_length, Address(string_obj, count_offset));
1185 
1186   // Do a zero-length check. Even with string compression `count == 0` means empty.
1187   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1188                 "Expecting 0=compressed, 1=uncompressed");
1189   // TODO: Support jecxz.
1190   NearLabel not_found_label;
1191   __ testl(string_length, string_length);
1192   __ j(kEqual, &not_found_label);
1193 
1194   if (mirror::kUseStringCompression) {
1195     string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1196     __ movl(string_length_flagged, string_length);
1197     // Extract the length and shift out the least significant bit used as compression flag.
1198     __ shrl(string_length, Immediate(1));
1199   }
1200 
1201   if (start_at_zero) {
1202     // Number of chars to scan is the same as the string length.
1203     __ movl(counter, string_length);
1204 
1205     // Move to the start of the string.
1206     __ addl(string_obj, Immediate(value_offset));
1207   } else {
1208     Register start_index = locations->InAt(2).AsRegister<Register>();
1209 
1210     // Do a start_index check.
1211     __ cmpl(start_index, string_length);
1212     __ j(kGreaterEqual, &not_found_label);
1213 
1214     // Ensure we have a start index >= 0;
1215     __ xorl(counter, counter);
1216     __ cmpl(start_index, Immediate(0));
1217     __ cmovl(kGreater, counter, start_index);
1218 
1219     if (mirror::kUseStringCompression) {
1220       NearLabel modify_counter, offset_uncompressed_label;
1221       __ testl(string_length_flagged, Immediate(1));
1222       __ j(kNotZero, &offset_uncompressed_label);
1223       // Move to the start of the string: string_obj + value_offset + start_index.
1224       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1225       __ jmp(&modify_counter);
1226 
1227       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1228       __ Bind(&offset_uncompressed_label);
1229       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1230 
1231       // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1232       // compare.
1233       __ Bind(&modify_counter);
1234     } else {
1235       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1236     }
1237     __ negl(counter);
1238     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1239   }
1240 
1241   if (mirror::kUseStringCompression) {
1242     NearLabel uncompressed_string_comparison;
1243     NearLabel comparison_done;
1244     __ testl(string_length_flagged, Immediate(1));
1245     __ j(kNotZero, &uncompressed_string_comparison);
1246 
1247     // Check if EAX (search_value) is ASCII.
1248     __ cmpl(search_value, Immediate(127));
1249     __ j(kGreater, &not_found_label);
1250     // Comparing byte-per-byte.
1251     __ repne_scasb();
1252     __ jmp(&comparison_done);
1253 
1254     // Everything is set up for repne scasw:
1255     //   * Comparison address in EDI.
1256     //   * Counter in ECX.
1257     __ Bind(&uncompressed_string_comparison);
1258     __ repne_scasw();
1259     __ Bind(&comparison_done);
1260   } else {
1261     __ repne_scasw();
1262   }
1263   // Did we find a match?
1264   __ j(kNotEqual, &not_found_label);
1265 
1266   // Yes, we matched.  Compute the index of the result.
1267   __ subl(string_length, counter);
1268   __ leal(out, Address(string_length, -1));
1269 
1270   NearLabel done;
1271   __ jmp(&done);
1272 
1273   // Failed to match; return -1.
1274   __ Bind(&not_found_label);
1275   __ movl(out, Immediate(-1));
1276 
1277   // And join up at the end.
1278   __ Bind(&done);
1279   if (slow_path != nullptr) {
1280     __ Bind(slow_path->GetExitLabel());
1281   }
1282 }
1283 
VisitStringIndexOf(HInvoke * invoke)1284 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1285   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1286 }
1287 
VisitStringIndexOf(HInvoke * invoke)1288 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1289   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1290 }
1291 
VisitStringIndexOfAfter(HInvoke * invoke)1292 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1293   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1294 }
1295 
VisitStringIndexOfAfter(HInvoke * invoke)1296 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1297   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1298 }
1299 
VisitStringNewStringFromBytes(HInvoke * invoke)1300 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1301   LocationSummary* locations = new (allocator_) LocationSummary(
1302       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1303   InvokeRuntimeCallingConvention calling_convention;
1304   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1305   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1306   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1307   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1308   locations->SetOut(Location::RegisterLocation(EAX));
1309 }
1310 
VisitStringNewStringFromBytes(HInvoke * invoke)1311 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1312   X86Assembler* assembler = GetAssembler();
1313   LocationSummary* locations = invoke->GetLocations();
1314 
1315   Register byte_array = locations->InAt(0).AsRegister<Register>();
1316   __ testl(byte_array, byte_array);
1317   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1318   codegen_->AddSlowPath(slow_path);
1319   __ j(kEqual, slow_path->GetEntryLabel());
1320 
1321   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1322   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1323   __ Bind(slow_path->GetExitLabel());
1324 }
1325 
VisitStringNewStringFromChars(HInvoke * invoke)1326 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1327   LocationSummary* locations =
1328       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1329   InvokeRuntimeCallingConvention calling_convention;
1330   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1331   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1332   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1333   locations->SetOut(Location::RegisterLocation(EAX));
1334 }
1335 
VisitStringNewStringFromChars(HInvoke * invoke)1336 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1337   // No need to emit code checking whether `locations->InAt(2)` is a null
1338   // pointer, as callers of the native method
1339   //
1340   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1341   //
1342   // all include a null check on `data` before calling that method.
1343   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1344   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1345 }
1346 
VisitStringNewStringFromString(HInvoke * invoke)1347 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1348   LocationSummary* locations = new (allocator_) LocationSummary(
1349       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1350   InvokeRuntimeCallingConvention calling_convention;
1351   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1352   locations->SetOut(Location::RegisterLocation(EAX));
1353 }
1354 
VisitStringNewStringFromString(HInvoke * invoke)1355 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1356   X86Assembler* assembler = GetAssembler();
1357   LocationSummary* locations = invoke->GetLocations();
1358 
1359   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1360   __ testl(string_to_copy, string_to_copy);
1361   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1362   codegen_->AddSlowPath(slow_path);
1363   __ j(kEqual, slow_path->GetEntryLabel());
1364 
1365   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1366   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1367   __ Bind(slow_path->GetExitLabel());
1368 }
1369 
VisitStringGetCharsNoCheck(HInvoke * invoke)1370 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1371   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1372   LocationSummary* locations =
1373       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1374   locations->SetInAt(0, Location::RequiresRegister());
1375   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1376   // Place srcEnd in ECX to save a move below.
1377   locations->SetInAt(2, Location::RegisterLocation(ECX));
1378   locations->SetInAt(3, Location::RequiresRegister());
1379   locations->SetInAt(4, Location::RequiresRegister());
1380 
1381   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1382   // We don't have enough registers to also grab ECX, so handle below.
1383   locations->AddTemp(Location::RegisterLocation(ESI));
1384   locations->AddTemp(Location::RegisterLocation(EDI));
1385 }
1386 
VisitStringGetCharsNoCheck(HInvoke * invoke)1387 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1388   X86Assembler* assembler = GetAssembler();
1389   LocationSummary* locations = invoke->GetLocations();
1390 
1391   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1392   // Location of data in char array buffer.
1393   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1394   // Location of char array data in string.
1395   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1396 
1397   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1398   Register obj = locations->InAt(0).AsRegister<Register>();
1399   Location srcBegin = locations->InAt(1);
1400   int srcBegin_value =
1401     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1402   Register srcEnd = locations->InAt(2).AsRegister<Register>();
1403   Register dst = locations->InAt(3).AsRegister<Register>();
1404   Register dstBegin = locations->InAt(4).AsRegister<Register>();
1405 
1406   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1407   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1408   DCHECK_EQ(char_size, 2u);
1409 
1410   // Compute the number of chars (words) to move.
1411   // Save ECX, since we don't know if it will be used later.
1412   __ pushl(ECX);
1413   int stack_adjust = kX86WordSize;
1414   __ cfi().AdjustCFAOffset(stack_adjust);
1415   DCHECK_EQ(srcEnd, ECX);
1416   if (srcBegin.IsConstant()) {
1417     __ subl(ECX, Immediate(srcBegin_value));
1418   } else {
1419     DCHECK(srcBegin.IsRegister());
1420     __ subl(ECX, srcBegin.AsRegister<Register>());
1421   }
1422 
1423   NearLabel done;
1424   if (mirror::kUseStringCompression) {
1425     // Location of count in string
1426     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1427     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1428     DCHECK_EQ(c_char_size, 1u);
1429     __ pushl(EAX);
1430     __ cfi().AdjustCFAOffset(stack_adjust);
1431 
1432     NearLabel copy_loop, copy_uncompressed;
1433     __ testl(Address(obj, count_offset), Immediate(1));
1434     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1435                   "Expecting 0=compressed, 1=uncompressed");
1436     __ j(kNotZero, &copy_uncompressed);
1437     // Compute the address of the source string by adding the number of chars from
1438     // the source beginning to the value offset of a string.
1439     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1440 
1441     // Start the loop to copy String's value to Array of Char.
1442     __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1443     __ Bind(&copy_loop);
1444     __ jecxz(&done);
1445     // Use EAX temporary (convert byte from ESI to word).
1446     // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1447     __ movzxb(EAX, Address(ESI, 0));
1448     __ movw(Address(EDI, 0), EAX);
1449     __ leal(EDI, Address(EDI, char_size));
1450     __ leal(ESI, Address(ESI, c_char_size));
1451     // TODO: Add support for LOOP to X86Assembler.
1452     __ subl(ECX, Immediate(1));
1453     __ jmp(&copy_loop);
1454     __ Bind(&copy_uncompressed);
1455   }
1456 
1457   // Do the copy for uncompressed string.
1458   // Compute the address of the destination buffer.
1459   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1460   __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1461   __ rep_movsw();
1462 
1463   __ Bind(&done);
1464   if (mirror::kUseStringCompression) {
1465     // Restore EAX.
1466     __ popl(EAX);
1467     __ cfi().AdjustCFAOffset(-stack_adjust);
1468   }
1469   // Restore ECX.
1470   __ popl(ECX);
1471   __ cfi().AdjustCFAOffset(-stack_adjust);
1472 }
1473 
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1474 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1475   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1476   Location out_loc = locations->Out();
1477   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1478   // to avoid a SIGBUS.
1479   switch (size) {
1480     case DataType::Type::kInt8:
1481       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1482       break;
1483     case DataType::Type::kInt16:
1484       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1485       break;
1486     case DataType::Type::kInt32:
1487       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1488       break;
1489     case DataType::Type::kInt64:
1490       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1491       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1492       break;
1493     default:
1494       LOG(FATAL) << "Type not recognized for peek: " << size;
1495       UNREACHABLE();
1496   }
1497 }
1498 
VisitMemoryPeekByte(HInvoke * invoke)1499 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1500   CreateLongToIntLocations(allocator_, invoke);
1501 }
1502 
VisitMemoryPeekByte(HInvoke * invoke)1503 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1504   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1505 }
1506 
VisitMemoryPeekIntNative(HInvoke * invoke)1507 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1508   CreateLongToIntLocations(allocator_, invoke);
1509 }
1510 
VisitMemoryPeekIntNative(HInvoke * invoke)1511 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1512   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1513 }
1514 
VisitMemoryPeekLongNative(HInvoke * invoke)1515 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1516   CreateLongToLongLocations(allocator_, invoke);
1517 }
1518 
VisitMemoryPeekLongNative(HInvoke * invoke)1519 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1520   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1521 }
1522 
VisitMemoryPeekShortNative(HInvoke * invoke)1523 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1524   CreateLongToIntLocations(allocator_, invoke);
1525 }
1526 
VisitMemoryPeekShortNative(HInvoke * invoke)1527 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1528   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1529 }
1530 
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1531 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1532                                          DataType::Type size,
1533                                          HInvoke* invoke) {
1534   LocationSummary* locations =
1535       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1536   locations->SetInAt(0, Location::RequiresRegister());
1537   HInstruction* value = invoke->InputAt(1);
1538   if (size == DataType::Type::kInt8) {
1539     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1540   } else {
1541     locations->SetInAt(1, Location::RegisterOrConstant(value));
1542   }
1543 }
1544 
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1545 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1546   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1547   Location value_loc = locations->InAt(1);
1548   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1549   // to avoid a SIGBUS.
1550   switch (size) {
1551     case DataType::Type::kInt8:
1552       if (value_loc.IsConstant()) {
1553         __ movb(Address(address, 0),
1554                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1555       } else {
1556         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1557       }
1558       break;
1559     case DataType::Type::kInt16:
1560       if (value_loc.IsConstant()) {
1561         __ movw(Address(address, 0),
1562                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1563       } else {
1564         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1565       }
1566       break;
1567     case DataType::Type::kInt32:
1568       if (value_loc.IsConstant()) {
1569         __ movl(Address(address, 0),
1570                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1571       } else {
1572         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1573       }
1574       break;
1575     case DataType::Type::kInt64:
1576       if (value_loc.IsConstant()) {
1577         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1578         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1579         __ movl(Address(address, 4), Immediate(High32Bits(value)));
1580       } else {
1581         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1582         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1583       }
1584       break;
1585     default:
1586       LOG(FATAL) << "Type not recognized for poke: " << size;
1587       UNREACHABLE();
1588   }
1589 }
1590 
VisitMemoryPokeByte(HInvoke * invoke)1591 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1592   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1593 }
1594 
VisitMemoryPokeByte(HInvoke * invoke)1595 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1596   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1597 }
1598 
VisitMemoryPokeIntNative(HInvoke * invoke)1599 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1600   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1601 }
1602 
VisitMemoryPokeIntNative(HInvoke * invoke)1603 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1604   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1605 }
1606 
VisitMemoryPokeLongNative(HInvoke * invoke)1607 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1608   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1609 }
1610 
VisitMemoryPokeLongNative(HInvoke * invoke)1611 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1612   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1613 }
1614 
VisitMemoryPokeShortNative(HInvoke * invoke)1615 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1616   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1617 }
1618 
VisitMemoryPokeShortNative(HInvoke * invoke)1619 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1620   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1621 }
1622 
VisitThreadCurrentThread(HInvoke * invoke)1623 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1624   LocationSummary* locations =
1625       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1626   locations->SetOut(Location::RequiresRegister());
1627 }
1628 
VisitThreadCurrentThread(HInvoke * invoke)1629 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1630   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1631   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1632 }
1633 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1634 static void GenUnsafeGet(HInvoke* invoke,
1635                          DataType::Type type,
1636                          bool is_volatile,
1637                          CodeGeneratorX86* codegen) {
1638   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1639   LocationSummary* locations = invoke->GetLocations();
1640   Location base_loc = locations->InAt(1);
1641   Register base = base_loc.AsRegister<Register>();
1642   Location offset_loc = locations->InAt(2);
1643   Register offset = offset_loc.AsRegisterPairLow<Register>();
1644   Location output_loc = locations->Out();
1645 
1646   switch (type) {
1647     case DataType::Type::kInt32: {
1648       Register output = output_loc.AsRegister<Register>();
1649       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1650       break;
1651     }
1652 
1653     case DataType::Type::kReference: {
1654       Register output = output_loc.AsRegister<Register>();
1655       if (kEmitCompilerReadBarrier) {
1656         if (kUseBakerReadBarrier) {
1657           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1658           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1659               invoke, output_loc, base, src, /* needs_null_check= */ false);
1660         } else {
1661           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1662           codegen->GenerateReadBarrierSlow(
1663               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1664         }
1665       } else {
1666         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1667         __ MaybeUnpoisonHeapReference(output);
1668       }
1669       break;
1670     }
1671 
1672     case DataType::Type::kInt64: {
1673         Register output_lo = output_loc.AsRegisterPairLow<Register>();
1674         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1675         if (is_volatile) {
1676           // Need to use a XMM to read atomically.
1677           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1678           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1679           __ movd(output_lo, temp);
1680           __ psrlq(temp, Immediate(32));
1681           __ movd(output_hi, temp);
1682         } else {
1683           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1684           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1685         }
1686       }
1687       break;
1688 
1689     default:
1690       LOG(FATAL) << "Unsupported op size " << type;
1691       UNREACHABLE();
1692   }
1693 }
1694 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1695 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1696                                           HInvoke* invoke,
1697                                           DataType::Type type,
1698                                           bool is_volatile) {
1699   bool can_call = kEmitCompilerReadBarrier &&
1700       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1701        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1702   LocationSummary* locations =
1703       new (allocator) LocationSummary(invoke,
1704                                       can_call
1705                                           ? LocationSummary::kCallOnSlowPath
1706                                           : LocationSummary::kNoCall,
1707                                       kIntrinsified);
1708   if (can_call && kUseBakerReadBarrier) {
1709     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1710   }
1711   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1712   locations->SetInAt(1, Location::RequiresRegister());
1713   locations->SetInAt(2, Location::RequiresRegister());
1714   if (type == DataType::Type::kInt64) {
1715     if (is_volatile) {
1716       // Need to use XMM to read volatile.
1717       locations->AddTemp(Location::RequiresFpuRegister());
1718       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1719     } else {
1720       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1721     }
1722   } else {
1723     locations->SetOut(Location::RequiresRegister(),
1724                       (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1725   }
1726 }
1727 
VisitUnsafeGet(HInvoke * invoke)1728 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1729   CreateIntIntIntToIntLocations(
1730       allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false);
1731 }
VisitUnsafeGetVolatile(HInvoke * invoke)1732 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1733   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true);
1734 }
VisitUnsafeGetLong(HInvoke * invoke)1735 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1736   CreateIntIntIntToIntLocations(
1737       allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false);
1738 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1739 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1740   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true);
1741 }
VisitUnsafeGetObject(HInvoke * invoke)1742 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1743   CreateIntIntIntToIntLocations(
1744       allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false);
1745 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1746 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1747   CreateIntIntIntToIntLocations(
1748       allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true);
1749 }
1750 
1751 
VisitUnsafeGet(HInvoke * invoke)1752 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1753   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1754 }
VisitUnsafeGetVolatile(HInvoke * invoke)1755 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1756   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1757 }
VisitUnsafeGetLong(HInvoke * invoke)1758 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1759   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1760 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1761 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1762   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1763 }
VisitUnsafeGetObject(HInvoke * invoke)1764 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1765   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1766 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1767 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1768   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1769 }
1770 
1771 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1772 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1773                                                        DataType::Type type,
1774                                                        HInvoke* invoke,
1775                                                        bool is_volatile) {
1776   LocationSummary* locations =
1777       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1778   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1779   locations->SetInAt(1, Location::RequiresRegister());
1780   locations->SetInAt(2, Location::RequiresRegister());
1781   locations->SetInAt(3, Location::RequiresRegister());
1782   if (type == DataType::Type::kReference) {
1783     // Need temp registers for card-marking.
1784     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1785     // Ensure the value is in a byte register.
1786     locations->AddTemp(Location::RegisterLocation(ECX));
1787   } else if (type == DataType::Type::kInt64 && is_volatile) {
1788     locations->AddTemp(Location::RequiresFpuRegister());
1789     locations->AddTemp(Location::RequiresFpuRegister());
1790   }
1791 }
1792 
VisitUnsafePut(HInvoke * invoke)1793 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1794   CreateIntIntIntIntToVoidPlusTempsLocations(
1795       allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1796 }
VisitUnsafePutOrdered(HInvoke * invoke)1797 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1798   CreateIntIntIntIntToVoidPlusTempsLocations(
1799       allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1800 }
VisitUnsafePutVolatile(HInvoke * invoke)1801 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1802   CreateIntIntIntIntToVoidPlusTempsLocations(
1803       allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true);
1804 }
VisitUnsafePutObject(HInvoke * invoke)1805 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1806   CreateIntIntIntIntToVoidPlusTempsLocations(
1807       allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1808 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1809 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1810   CreateIntIntIntIntToVoidPlusTempsLocations(
1811       allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1812 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1813 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1814   CreateIntIntIntIntToVoidPlusTempsLocations(
1815       allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true);
1816 }
VisitUnsafePutLong(HInvoke * invoke)1817 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1818   CreateIntIntIntIntToVoidPlusTempsLocations(
1819       allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1820 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1821 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1822   CreateIntIntIntIntToVoidPlusTempsLocations(
1823       allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1824 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1825 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1826   CreateIntIntIntIntToVoidPlusTempsLocations(
1827       allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true);
1828 }
1829 
1830 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1831 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1832 static void GenUnsafePut(LocationSummary* locations,
1833                          DataType::Type type,
1834                          bool is_volatile,
1835                          CodeGeneratorX86* codegen) {
1836   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1837   Register base = locations->InAt(1).AsRegister<Register>();
1838   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1839   Location value_loc = locations->InAt(3);
1840 
1841   if (type == DataType::Type::kInt64) {
1842     Register value_lo = value_loc.AsRegisterPairLow<Register>();
1843     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1844     if (is_volatile) {
1845       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1846       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1847       __ movd(temp1, value_lo);
1848       __ movd(temp2, value_hi);
1849       __ punpckldq(temp1, temp2);
1850       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1851     } else {
1852       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1853       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1854     }
1855   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1856     Register temp = locations->GetTemp(0).AsRegister<Register>();
1857     __ movl(temp, value_loc.AsRegister<Register>());
1858     __ PoisonHeapReference(temp);
1859     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
1860   } else {
1861     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1862   }
1863 
1864   if (is_volatile) {
1865     codegen->MemoryFence();
1866   }
1867 
1868   if (type == DataType::Type::kReference) {
1869     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1870     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1871                         locations->GetTemp(1).AsRegister<Register>(),
1872                         base,
1873                         value_loc.AsRegister<Register>(),
1874                         value_can_be_null);
1875   }
1876 }
1877 
VisitUnsafePut(HInvoke * invoke)1878 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1879   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1880 }
VisitUnsafePutOrdered(HInvoke * invoke)1881 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1882   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1883 }
VisitUnsafePutVolatile(HInvoke * invoke)1884 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1885   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1886 }
VisitUnsafePutObject(HInvoke * invoke)1887 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1888   GenUnsafePut(
1889       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1890 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1891 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1892   GenUnsafePut(
1893       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1894 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1895 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1896   GenUnsafePut(
1897       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1898 }
VisitUnsafePutLong(HInvoke * invoke)1899 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1900   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1901 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1902 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1903   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1904 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1905 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1906   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1907 }
1908 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1909 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
1910                                        DataType::Type type,
1911                                        HInvoke* invoke) {
1912   bool can_call = kEmitCompilerReadBarrier &&
1913       kUseBakerReadBarrier &&
1914       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1915   LocationSummary* locations =
1916       new (allocator) LocationSummary(invoke,
1917                                       can_call
1918                                           ? LocationSummary::kCallOnSlowPath
1919                                           : LocationSummary::kNoCall,
1920                                       kIntrinsified);
1921   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1922   locations->SetInAt(1, Location::RequiresRegister());
1923   // Offset is a long, but in 32 bit mode, we only need the low word.
1924   // Can we update the invoke here to remove a TypeConvert to Long?
1925   locations->SetInAt(2, Location::RequiresRegister());
1926   // Expected value must be in EAX or EDX:EAX.
1927   // For long, new value must be in ECX:EBX.
1928   if (type == DataType::Type::kInt64) {
1929     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
1930     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
1931   } else {
1932     locations->SetInAt(3, Location::RegisterLocation(EAX));
1933     locations->SetInAt(4, Location::RequiresRegister());
1934   }
1935 
1936   // Force a byte register for the output.
1937   locations->SetOut(Location::RegisterLocation(EAX));
1938   if (type == DataType::Type::kReference) {
1939     // Need temporary registers for card-marking, and possibly for
1940     // (Baker) read barrier.
1941     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1942     // Need a byte register for marking.
1943     locations->AddTemp(Location::RegisterLocation(ECX));
1944   }
1945 }
1946 
VisitUnsafeCASInt(HInvoke * invoke)1947 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
1948   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
1949 }
1950 
VisitUnsafeCASLong(HInvoke * invoke)1951 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
1952   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
1953 }
1954 
VisitUnsafeCASObject(HInvoke * invoke)1955 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
1956   // The only read barrier implementation supporting the
1957   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1958   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1959     return;
1960   }
1961 
1962   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
1963 }
1964 
GenPrimitiveLockedCmpxchg(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp=Register::kNoRegister)1965 static void GenPrimitiveLockedCmpxchg(DataType::Type type,
1966                                       CodeGeneratorX86* codegen,
1967                                       Location expected_value,
1968                                       Location new_value,
1969                                       Register base,
1970                                       Register offset,
1971                                       // Only necessary for floating point
1972                                       Register temp = Register::kNoRegister) {
1973   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1974 
1975   if (DataType::Kind(type) == DataType::Type::kInt32) {
1976     DCHECK_EQ(expected_value.AsRegister<Register>(), EAX);
1977   }
1978 
1979   // The address of the field within the holding object.
1980   Address field_addr(base, offset, TIMES_1, 0);
1981 
1982   switch (type) {
1983     case DataType::Type::kBool:
1984     case DataType::Type::kInt8:
1985       __ LockCmpxchgb(field_addr, new_value.AsRegister<ByteRegister>());
1986       break;
1987     case DataType::Type::kInt16:
1988     case DataType::Type::kUint16:
1989       __ LockCmpxchgw(field_addr, new_value.AsRegister<Register>());
1990       break;
1991     case DataType::Type::kInt32:
1992       __ LockCmpxchgl(field_addr, new_value.AsRegister<Register>());
1993       break;
1994     case DataType::Type::kFloat32: {
1995       // cmpxchg requires the expected value to be in EAX so the new value must be elsewhere.
1996       DCHECK_NE(temp, EAX);
1997       // EAX is both an input and an output for cmpxchg
1998       codegen->Move32(Location::RegisterLocation(EAX), expected_value);
1999       codegen->Move32(Location::RegisterLocation(temp), new_value);
2000       __ LockCmpxchgl(field_addr, temp);
2001       break;
2002     }
2003     case DataType::Type::kInt64:
2004       // Ensure the expected value is in EAX:EDX and that the new
2005       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2006       DCHECK_EQ(expected_value.AsRegisterPairLow<Register>(), EAX);
2007       DCHECK_EQ(expected_value.AsRegisterPairHigh<Register>(), EDX);
2008       DCHECK_EQ(new_value.AsRegisterPairLow<Register>(), EBX);
2009       DCHECK_EQ(new_value.AsRegisterPairHigh<Register>(), ECX);
2010       __ LockCmpxchg8b(field_addr);
2011       break;
2012     default:
2013       LOG(FATAL) << "Unexpected CAS type " << type;
2014   }
2015   // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2016   // don't need scheduling barriers at this time.
2017 }
2018 
GenPrimitiveCAS(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Location out,Register temp=Register::kNoRegister,bool is_cmpxchg=false)2019 static void GenPrimitiveCAS(DataType::Type type,
2020                             CodeGeneratorX86* codegen,
2021                             Location expected_value,
2022                             Location new_value,
2023                             Register base,
2024                             Register offset,
2025                             Location out,
2026                             // Only necessary for floating point
2027                             Register temp = Register::kNoRegister,
2028                             bool is_cmpxchg = false) {
2029   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2030 
2031   if (!is_cmpxchg || DataType::Kind(type) == DataType::Type::kInt32) {
2032     DCHECK_EQ(out.AsRegister<Register>(), EAX);
2033   }
2034 
2035   GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp);
2036 
2037   if (is_cmpxchg) {
2038     // Sign-extend, zero-extend or move the result if necessary
2039     switch (type) {
2040       case DataType::Type::kBool:
2041         __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2042         break;
2043       case DataType::Type::kInt8:
2044         __ movsxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2045         break;
2046       case DataType::Type::kInt16:
2047         __ movsxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2048         break;
2049       case DataType::Type::kUint16:
2050         __ movzxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2051         break;
2052       case DataType::Type::kFloat32:
2053         __ movd(out.AsFpuRegister<XmmRegister>(), EAX);
2054         break;
2055       default:
2056         // Nothing to do
2057         break;
2058     }
2059   } else {
2060     // Convert ZF into the Boolean result.
2061     __ setb(kZero, out.AsRegister<Register>());
2062     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2063   }
2064 }
2065 
GenReferenceCAS(HInvoke * invoke,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp,Register temp2,bool is_cmpxchg=false)2066 static void GenReferenceCAS(HInvoke* invoke,
2067                             CodeGeneratorX86* codegen,
2068                             Location expected_value,
2069                             Location new_value,
2070                             Register base,
2071                             Register offset,
2072                             Register temp,
2073                             Register temp2,
2074                             bool is_cmpxchg = false) {
2075   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2076   LocationSummary* locations = invoke->GetLocations();
2077   Location out = locations->Out();
2078 
2079   // The address of the field within the holding object.
2080   Address field_addr(base, offset, TIMES_1, 0);
2081 
2082   Register value = new_value.AsRegister<Register>();
2083   Register expected = expected_value.AsRegister<Register>();
2084   DCHECK_EQ(expected, EAX);
2085   DCHECK_NE(temp, temp2);
2086 
2087   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2088     // Need to make sure the reference stored in the field is a to-space
2089     // one before attempting the CAS or the CAS could fail incorrectly.
2090     codegen->GenerateReferenceLoadWithBakerReadBarrier(
2091         invoke,
2092         // Unused, used only as a "temporary" within the read barrier.
2093         Location::RegisterLocation(temp),
2094         base,
2095         field_addr,
2096         /* needs_null_check= */ false,
2097         /* always_update_field= */ true,
2098         &temp2);
2099   }
2100   bool base_equals_value = (base == value);
2101   if (kPoisonHeapReferences) {
2102     if (base_equals_value) {
2103       // If `base` and `value` are the same register location, move
2104       // `value` to a temporary register.  This way, poisoning
2105       // `value` won't invalidate `base`.
2106       value = temp;
2107       __ movl(value, base);
2108     }
2109 
2110     // Check that the register allocator did not assign the location
2111     // of `expected` (EAX) to `value` nor to `base`, so that heap
2112     // poisoning (when enabled) works as intended below.
2113     // - If `value` were equal to `expected`, both references would
2114     //   be poisoned twice, meaning they would not be poisoned at
2115     //   all, as heap poisoning uses address negation.
2116     // - If `base` were equal to `expected`, poisoning `expected`
2117     //   would invalidate `base`.
2118     DCHECK_NE(value, expected);
2119     DCHECK_NE(base, expected);
2120     __ PoisonHeapReference(expected);
2121     __ PoisonHeapReference(value);
2122   }
2123   __ LockCmpxchgl(field_addr, value);
2124 
2125   // LOCK CMPXCHG has full barrier semantics, and we don't need
2126   // scheduling barriers at this time.
2127 
2128   if (is_cmpxchg) {
2129     DCHECK_EQ(out.AsRegister<Register>(), EAX);
2130     __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
2131   } else {
2132     // Convert ZF into the Boolean result.
2133     __ setb(kZero, out.AsRegister<Register>());
2134     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2135   }
2136 
2137   // Mark card for object if the new value is stored.
2138   bool value_can_be_null = true;  // TODO: Worth finding out this information?
2139   NearLabel skip_mark_gc_card;
2140   __ j(kNotZero, &skip_mark_gc_card);
2141   codegen->MarkGCCard(temp, temp2, base, value, value_can_be_null);
2142   __ Bind(&skip_mark_gc_card);
2143 
2144   // If heap poisoning is enabled, we need to unpoison the values
2145   // that were poisoned earlier.
2146   if (kPoisonHeapReferences) {
2147     if (base_equals_value) {
2148       // `value` has been moved to a temporary register, no need to
2149       // unpoison it.
2150     } else {
2151       // Ensure `value` is different from `out`, so that unpoisoning
2152       // the former does not invalidate the latter.
2153       DCHECK_NE(value, out.AsRegister<Register>());
2154       __ UnpoisonHeapReference(value);
2155     }
2156   }
2157   // Do not unpoison the reference contained in register
2158   // `expected`, as it is the same as register `out` (EAX).
2159 }
2160 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2161 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2162   LocationSummary* locations = invoke->GetLocations();
2163 
2164   Register base = locations->InAt(1).AsRegister<Register>();
2165   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2166   Location expected_value = locations->InAt(3);
2167   Location new_value = locations->InAt(4);
2168   Location out = locations->Out();
2169   DCHECK_EQ(out.AsRegister<Register>(), EAX);
2170 
2171   if (type == DataType::Type::kReference) {
2172     // The only read barrier implementation supporting the
2173     // UnsafeCASObject intrinsic is the Baker-style read barriers.
2174     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2175 
2176     Register temp = locations->GetTemp(0).AsRegister<Register>();
2177     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2178     GenReferenceCAS(invoke, codegen, expected_value, new_value, base, offset, temp, temp2);
2179   } else {
2180     DCHECK(!DataType::IsFloatingPointType(type));
2181     GenPrimitiveCAS(type, codegen, expected_value, new_value, base, offset, out);
2182   }
2183 }
2184 
VisitUnsafeCASInt(HInvoke * invoke)2185 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2186   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2187 }
2188 
VisitUnsafeCASLong(HInvoke * invoke)2189 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2190   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2191 }
2192 
VisitUnsafeCASObject(HInvoke * invoke)2193 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2194   // The only read barrier implementation supporting the
2195   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2196   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2197 
2198   GenCAS(DataType::Type::kReference, invoke, codegen_);
2199 }
2200 
VisitIntegerReverse(HInvoke * invoke)2201 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2202   LocationSummary* locations =
2203       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2204   locations->SetInAt(0, Location::RequiresRegister());
2205   locations->SetOut(Location::SameAsFirstInput());
2206   locations->AddTemp(Location::RequiresRegister());
2207 }
2208 
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2209 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2210                      X86Assembler* assembler) {
2211   Immediate imm_shift(shift);
2212   Immediate imm_mask(mask);
2213   __ movl(temp, reg);
2214   __ shrl(reg, imm_shift);
2215   __ andl(temp, imm_mask);
2216   __ andl(reg, imm_mask);
2217   __ shll(temp, imm_shift);
2218   __ orl(reg, temp);
2219 }
2220 
VisitIntegerReverse(HInvoke * invoke)2221 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2222   X86Assembler* assembler = GetAssembler();
2223   LocationSummary* locations = invoke->GetLocations();
2224 
2225   Register reg = locations->InAt(0).AsRegister<Register>();
2226   Register temp = locations->GetTemp(0).AsRegister<Register>();
2227 
2228   /*
2229    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2230    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2231    * compared to generic luni implementation which has 5 rounds of swapping bits.
2232    * x = bswap x
2233    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2234    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2235    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2236    */
2237   __ bswapl(reg);
2238   SwapBits(reg, temp, 1, 0x55555555, assembler);
2239   SwapBits(reg, temp, 2, 0x33333333, assembler);
2240   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2241 }
2242 
VisitLongReverse(HInvoke * invoke)2243 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2244   LocationSummary* locations =
2245       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2246   locations->SetInAt(0, Location::RequiresRegister());
2247   locations->SetOut(Location::SameAsFirstInput());
2248   locations->AddTemp(Location::RequiresRegister());
2249 }
2250 
VisitLongReverse(HInvoke * invoke)2251 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2252   X86Assembler* assembler = GetAssembler();
2253   LocationSummary* locations = invoke->GetLocations();
2254 
2255   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2256   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2257   Register temp = locations->GetTemp(0).AsRegister<Register>();
2258 
2259   // We want to swap high/low, then bswap each one, and then do the same
2260   // as a 32 bit reverse.
2261   // Exchange high and low.
2262   __ movl(temp, reg_low);
2263   __ movl(reg_low, reg_high);
2264   __ movl(reg_high, temp);
2265 
2266   // bit-reverse low
2267   __ bswapl(reg_low);
2268   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2269   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2270   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2271 
2272   // bit-reverse high
2273   __ bswapl(reg_high);
2274   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2275   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2276   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2277 }
2278 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2279 static void CreateBitCountLocations(
2280     ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2281   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2282     // Do nothing if there is no popcnt support. This results in generating
2283     // a call for the intrinsic rather than direct code.
2284     return;
2285   }
2286   LocationSummary* locations =
2287       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2288   if (is_long) {
2289     locations->AddTemp(Location::RequiresRegister());
2290   }
2291   locations->SetInAt(0, Location::Any());
2292   locations->SetOut(Location::RequiresRegister());
2293 }
2294 
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2295 static void GenBitCount(X86Assembler* assembler,
2296                         CodeGeneratorX86* codegen,
2297                         HInvoke* invoke, bool is_long) {
2298   LocationSummary* locations = invoke->GetLocations();
2299   Location src = locations->InAt(0);
2300   Register out = locations->Out().AsRegister<Register>();
2301 
2302   if (invoke->InputAt(0)->IsConstant()) {
2303     // Evaluate this at compile time.
2304     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2305     int32_t result = is_long
2306         ? POPCOUNT(static_cast<uint64_t>(value))
2307         : POPCOUNT(static_cast<uint32_t>(value));
2308     codegen->Load32BitValue(out, result);
2309     return;
2310   }
2311 
2312   // Handle the non-constant cases.
2313   if (!is_long) {
2314     if (src.IsRegister()) {
2315       __ popcntl(out, src.AsRegister<Register>());
2316     } else {
2317       DCHECK(src.IsStackSlot());
2318       __ popcntl(out, Address(ESP, src.GetStackIndex()));
2319     }
2320   } else {
2321     // The 64-bit case needs to worry about two parts.
2322     Register temp = locations->GetTemp(0).AsRegister<Register>();
2323     if (src.IsRegisterPair()) {
2324       __ popcntl(temp, src.AsRegisterPairLow<Register>());
2325       __ popcntl(out, src.AsRegisterPairHigh<Register>());
2326     } else {
2327       DCHECK(src.IsDoubleStackSlot());
2328       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2329       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2330     }
2331     __ addl(out, temp);
2332   }
2333 }
2334 
VisitIntegerBitCount(HInvoke * invoke)2335 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2336   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2337 }
2338 
VisitIntegerBitCount(HInvoke * invoke)2339 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2340   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2341 }
2342 
VisitLongBitCount(HInvoke * invoke)2343 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2344   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2345 }
2346 
VisitLongBitCount(HInvoke * invoke)2347 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2348   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2349 }
2350 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2351 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2352   LocationSummary* locations =
2353       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2354   if (is_long) {
2355     locations->SetInAt(0, Location::RequiresRegister());
2356   } else {
2357     locations->SetInAt(0, Location::Any());
2358   }
2359   locations->SetOut(Location::RequiresRegister());
2360 }
2361 
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2362 static void GenLeadingZeros(X86Assembler* assembler,
2363                             CodeGeneratorX86* codegen,
2364                             HInvoke* invoke, bool is_long) {
2365   LocationSummary* locations = invoke->GetLocations();
2366   Location src = locations->InAt(0);
2367   Register out = locations->Out().AsRegister<Register>();
2368 
2369   if (invoke->InputAt(0)->IsConstant()) {
2370     // Evaluate this at compile time.
2371     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2372     if (value == 0) {
2373       value = is_long ? 64 : 32;
2374     } else {
2375       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2376     }
2377     codegen->Load32BitValue(out, value);
2378     return;
2379   }
2380 
2381   // Handle the non-constant cases.
2382   if (!is_long) {
2383     if (src.IsRegister()) {
2384       __ bsrl(out, src.AsRegister<Register>());
2385     } else {
2386       DCHECK(src.IsStackSlot());
2387       __ bsrl(out, Address(ESP, src.GetStackIndex()));
2388     }
2389 
2390     // BSR sets ZF if the input was zero, and the output is undefined.
2391     NearLabel all_zeroes, done;
2392     __ j(kEqual, &all_zeroes);
2393 
2394     // Correct the result from BSR to get the final CLZ result.
2395     __ xorl(out, Immediate(31));
2396     __ jmp(&done);
2397 
2398     // Fix the zero case with the expected result.
2399     __ Bind(&all_zeroes);
2400     __ movl(out, Immediate(32));
2401 
2402     __ Bind(&done);
2403     return;
2404   }
2405 
2406   // 64 bit case needs to worry about both parts of the register.
2407   DCHECK(src.IsRegisterPair());
2408   Register src_lo = src.AsRegisterPairLow<Register>();
2409   Register src_hi = src.AsRegisterPairHigh<Register>();
2410   NearLabel handle_low, done, all_zeroes;
2411 
2412   // Is the high word zero?
2413   __ testl(src_hi, src_hi);
2414   __ j(kEqual, &handle_low);
2415 
2416   // High word is not zero. We know that the BSR result is defined in this case.
2417   __ bsrl(out, src_hi);
2418 
2419   // Correct the result from BSR to get the final CLZ result.
2420   __ xorl(out, Immediate(31));
2421   __ jmp(&done);
2422 
2423   // High word was zero.  We have to compute the low word count and add 32.
2424   __ Bind(&handle_low);
2425   __ bsrl(out, src_lo);
2426   __ j(kEqual, &all_zeroes);
2427 
2428   // We had a valid result.  Use an XOR to both correct the result and add 32.
2429   __ xorl(out, Immediate(63));
2430   __ jmp(&done);
2431 
2432   // All zero case.
2433   __ Bind(&all_zeroes);
2434   __ movl(out, Immediate(64));
2435 
2436   __ Bind(&done);
2437 }
2438 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2439 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2440   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2441 }
2442 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2443 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2444   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2445 }
2446 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2447 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2448   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2449 }
2450 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2451 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2452   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2453 }
2454 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2455 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2456   LocationSummary* locations =
2457       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2458   if (is_long) {
2459     locations->SetInAt(0, Location::RequiresRegister());
2460   } else {
2461     locations->SetInAt(0, Location::Any());
2462   }
2463   locations->SetOut(Location::RequiresRegister());
2464 }
2465 
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2466 static void GenTrailingZeros(X86Assembler* assembler,
2467                              CodeGeneratorX86* codegen,
2468                              HInvoke* invoke, bool is_long) {
2469   LocationSummary* locations = invoke->GetLocations();
2470   Location src = locations->InAt(0);
2471   Register out = locations->Out().AsRegister<Register>();
2472 
2473   if (invoke->InputAt(0)->IsConstant()) {
2474     // Evaluate this at compile time.
2475     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2476     if (value == 0) {
2477       value = is_long ? 64 : 32;
2478     } else {
2479       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2480     }
2481     codegen->Load32BitValue(out, value);
2482     return;
2483   }
2484 
2485   // Handle the non-constant cases.
2486   if (!is_long) {
2487     if (src.IsRegister()) {
2488       __ bsfl(out, src.AsRegister<Register>());
2489     } else {
2490       DCHECK(src.IsStackSlot());
2491       __ bsfl(out, Address(ESP, src.GetStackIndex()));
2492     }
2493 
2494     // BSF sets ZF if the input was zero, and the output is undefined.
2495     NearLabel done;
2496     __ j(kNotEqual, &done);
2497 
2498     // Fix the zero case with the expected result.
2499     __ movl(out, Immediate(32));
2500 
2501     __ Bind(&done);
2502     return;
2503   }
2504 
2505   // 64 bit case needs to worry about both parts of the register.
2506   DCHECK(src.IsRegisterPair());
2507   Register src_lo = src.AsRegisterPairLow<Register>();
2508   Register src_hi = src.AsRegisterPairHigh<Register>();
2509   NearLabel done, all_zeroes;
2510 
2511   // If the low word is zero, then ZF will be set.  If not, we have the answer.
2512   __ bsfl(out, src_lo);
2513   __ j(kNotEqual, &done);
2514 
2515   // Low word was zero.  We have to compute the high word count and add 32.
2516   __ bsfl(out, src_hi);
2517   __ j(kEqual, &all_zeroes);
2518 
2519   // We had a valid result.  Add 32 to account for the low word being zero.
2520   __ addl(out, Immediate(32));
2521   __ jmp(&done);
2522 
2523   // All zero case.
2524   __ Bind(&all_zeroes);
2525   __ movl(out, Immediate(64));
2526 
2527   __ Bind(&done);
2528 }
2529 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2530 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2531   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
2532 }
2533 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2534 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2535   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2536 }
2537 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2538 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2539   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
2540 }
2541 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2542 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2543   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2544 }
2545 
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2546 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2547   return instruction->InputAt(input0) == instruction->InputAt(input1);
2548 }
2549 
2550 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,DataType::Type type,const Register & array,const Location & pos,const Register & base)2551 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2552                                           DataType::Type type,
2553                                           const Register& array,
2554                                           const Location& pos,
2555                                           const Register& base) {
2556   // This routine is only used by the SystemArrayCopy intrinsic at the
2557   // moment. We can allow DataType::Type::kReference as `type` to implement
2558   // the SystemArrayCopyChar intrinsic.
2559   DCHECK_EQ(type, DataType::Type::kReference);
2560   const int32_t element_size = DataType::Size(type);
2561   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2562   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2563 
2564   if (pos.IsConstant()) {
2565     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2566     __ leal(base, Address(array, element_size * constant + data_offset));
2567   } else {
2568     __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2569   }
2570 }
2571 
2572 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,DataType::Type type,const Location & copy_length,const Register & base,const Register & end)2573 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2574                                          DataType::Type type,
2575                                          const Location& copy_length,
2576                                          const Register& base,
2577                                          const Register& end) {
2578   // This routine is only used by the SystemArrayCopy intrinsic at the
2579   // moment. We can allow DataType::Type::kReference as `type` to implement
2580   // the SystemArrayCopyChar intrinsic.
2581   DCHECK_EQ(type, DataType::Type::kReference);
2582   const int32_t element_size = DataType::Size(type);
2583   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2584 
2585   if (copy_length.IsConstant()) {
2586     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2587     __ leal(end, Address(base, element_size * constant));
2588   } else {
2589     __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2590   }
2591 }
2592 
VisitSystemArrayCopy(HInvoke * invoke)2593 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2594   // The only read barrier implementation supporting the
2595   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2596   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2597     return;
2598   }
2599 
2600   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2601   if (invoke->GetLocations() != nullptr) {
2602     // Need a byte register for marking.
2603     invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2604 
2605     static constexpr size_t kSrc = 0;
2606     static constexpr size_t kSrcPos = 1;
2607     static constexpr size_t kDest = 2;
2608     static constexpr size_t kDestPos = 3;
2609     static constexpr size_t kLength = 4;
2610 
2611     if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2612         !invoke->InputAt(kDestPos)->IsIntConstant() &&
2613         !invoke->InputAt(kLength)->IsIntConstant()) {
2614       if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2615           !IsSameInput(invoke, kSrcPos, kLength) &&
2616           !IsSameInput(invoke, kDestPos, kLength) &&
2617           !IsSameInput(invoke, kSrc, kDest)) {
2618         // Not enough registers, make the length also take a stack slot.
2619         invoke->GetLocations()->SetInAt(kLength, Location::Any());
2620       }
2621     }
2622   }
2623 }
2624 
VisitSystemArrayCopy(HInvoke * invoke)2625 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2626   // The only read barrier implementation supporting the
2627   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2628   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2629 
2630   X86Assembler* assembler = GetAssembler();
2631   LocationSummary* locations = invoke->GetLocations();
2632 
2633   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2634   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2635   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2636   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2637   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2638 
2639   Register src = locations->InAt(0).AsRegister<Register>();
2640   Location src_pos = locations->InAt(1);
2641   Register dest = locations->InAt(2).AsRegister<Register>();
2642   Location dest_pos = locations->InAt(3);
2643   Location length_arg = locations->InAt(4);
2644   Location length = length_arg;
2645   Location temp1_loc = locations->GetTemp(0);
2646   Register temp1 = temp1_loc.AsRegister<Register>();
2647   Location temp2_loc = locations->GetTemp(1);
2648   Register temp2 = temp2_loc.AsRegister<Register>();
2649 
2650   SlowPathCode* intrinsic_slow_path =
2651       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
2652   codegen_->AddSlowPath(intrinsic_slow_path);
2653 
2654   NearLabel conditions_on_positions_validated;
2655   SystemArrayCopyOptimizations optimizations(invoke);
2656 
2657   // If source and destination are the same, we go to slow path if we need to do
2658   // forward copying.
2659   if (src_pos.IsConstant()) {
2660     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2661     if (dest_pos.IsConstant()) {
2662       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2663       if (optimizations.GetDestinationIsSource()) {
2664         // Checked when building locations.
2665         DCHECK_GE(src_pos_constant, dest_pos_constant);
2666       } else if (src_pos_constant < dest_pos_constant) {
2667         __ cmpl(src, dest);
2668         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2669       }
2670     } else {
2671       if (!optimizations.GetDestinationIsSource()) {
2672         __ cmpl(src, dest);
2673         __ j(kNotEqual, &conditions_on_positions_validated);
2674       }
2675       __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
2676       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
2677     }
2678   } else {
2679     if (!optimizations.GetDestinationIsSource()) {
2680       __ cmpl(src, dest);
2681       __ j(kNotEqual, &conditions_on_positions_validated);
2682     }
2683     if (dest_pos.IsConstant()) {
2684       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2685       __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
2686       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2687     } else {
2688       __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
2689       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2690     }
2691   }
2692 
2693   __ Bind(&conditions_on_positions_validated);
2694 
2695   if (!optimizations.GetSourceIsNotNull()) {
2696     // Bail out if the source is null.
2697     __ testl(src, src);
2698     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2699   }
2700 
2701   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2702     // Bail out if the destination is null.
2703     __ testl(dest, dest);
2704     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2705   }
2706 
2707   Location temp3_loc = locations->GetTemp(2);
2708   Register temp3 = temp3_loc.AsRegister<Register>();
2709   if (length.IsStackSlot()) {
2710     __ movl(temp3, Address(ESP, length.GetStackIndex()));
2711     length = Location::RegisterLocation(temp3);
2712   }
2713 
2714   // If the length is negative, bail out.
2715   // We have already checked in the LocationsBuilder for the constant case.
2716   if (!length.IsConstant() &&
2717       !optimizations.GetCountIsSourceLength() &&
2718       !optimizations.GetCountIsDestinationLength()) {
2719     __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
2720     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2721   }
2722 
2723   // Validity checks: source.
2724   CheckPosition(assembler,
2725                 src_pos,
2726                 src,
2727                 length,
2728                 intrinsic_slow_path,
2729                 temp1,
2730                 optimizations.GetCountIsSourceLength());
2731 
2732   // Validity checks: dest.
2733   CheckPosition(assembler,
2734                 dest_pos,
2735                 dest,
2736                 length,
2737                 intrinsic_slow_path,
2738                 temp1,
2739                 optimizations.GetCountIsDestinationLength());
2740 
2741   if (!optimizations.GetDoesNotNeedTypeCheck()) {
2742     // Check whether all elements of the source array are assignable to the component
2743     // type of the destination array. We do two checks: the classes are the same,
2744     // or the destination is Object[]. If none of these checks succeed, we go to the
2745     // slow path.
2746 
2747     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2748       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2749         // /* HeapReference<Class> */ temp1 = src->klass_
2750         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2751             invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2752         // Bail out if the source is not a non primitive array.
2753         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2754         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2755             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2756         __ testl(temp1, temp1);
2757         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2758         // If heap poisoning is enabled, `temp1` has been unpoisoned
2759         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2760       } else {
2761         // /* HeapReference<Class> */ temp1 = src->klass_
2762         __ movl(temp1, Address(src, class_offset));
2763         __ MaybeUnpoisonHeapReference(temp1);
2764         // Bail out if the source is not a non primitive array.
2765         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2766         __ movl(temp1, Address(temp1, component_offset));
2767         __ testl(temp1, temp1);
2768         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2769         __ MaybeUnpoisonHeapReference(temp1);
2770       }
2771       __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2772       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2773     }
2774 
2775     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2776       if (length.Equals(Location::RegisterLocation(temp3))) {
2777         // When Baker read barriers are enabled, register `temp3`,
2778         // which in the present case contains the `length` parameter,
2779         // will be overwritten below.  Make the `length` location
2780         // reference the original stack location; it will be moved
2781         // back to `temp3` later if necessary.
2782         DCHECK(length_arg.IsStackSlot());
2783         length = length_arg;
2784       }
2785 
2786       // /* HeapReference<Class> */ temp1 = dest->klass_
2787       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2788           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
2789 
2790       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2791         // Bail out if the destination is not a non primitive array.
2792         //
2793         // Register `temp1` is not trashed by the read barrier emitted
2794         // by GenerateFieldLoadWithBakerReadBarrier below, as that
2795         // method produces a call to a ReadBarrierMarkRegX entry point,
2796         // which saves all potentially live registers, including
2797         // temporaries such a `temp1`.
2798         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2799         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2800             invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false);
2801         __ testl(temp2, temp2);
2802         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2803         // If heap poisoning is enabled, `temp2` has been unpoisoned
2804         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2805         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2806         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2807       }
2808 
2809       // For the same reason given earlier, `temp1` is not trashed by the
2810       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2811       // /* HeapReference<Class> */ temp2 = src->klass_
2812       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2813           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
2814       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2815       __ cmpl(temp1, temp2);
2816 
2817       if (optimizations.GetDestinationIsTypedObjectArray()) {
2818         NearLabel do_copy;
2819         __ j(kEqual, &do_copy);
2820         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2821         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2822             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2823         // We do not need to emit a read barrier for the following
2824         // heap reference load, as `temp1` is only used in a
2825         // comparison with null below, and this reference is not
2826         // kept afterwards.
2827         __ cmpl(Address(temp1, super_offset), Immediate(0));
2828         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2829         __ Bind(&do_copy);
2830       } else {
2831         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2832       }
2833     } else {
2834       // Non read barrier code.
2835 
2836       // /* HeapReference<Class> */ temp1 = dest->klass_
2837       __ movl(temp1, Address(dest, class_offset));
2838       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2839         __ MaybeUnpoisonHeapReference(temp1);
2840         // Bail out if the destination is not a non primitive array.
2841         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2842         __ movl(temp2, Address(temp1, component_offset));
2843         __ testl(temp2, temp2);
2844         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2845         __ MaybeUnpoisonHeapReference(temp2);
2846         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2847         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2848         // Re-poison the heap reference to make the compare instruction below
2849         // compare two poisoned references.
2850         __ PoisonHeapReference(temp1);
2851       }
2852 
2853       // Note: if heap poisoning is on, we are comparing two poisoned references here.
2854       __ cmpl(temp1, Address(src, class_offset));
2855 
2856       if (optimizations.GetDestinationIsTypedObjectArray()) {
2857         NearLabel do_copy;
2858         __ j(kEqual, &do_copy);
2859         __ MaybeUnpoisonHeapReference(temp1);
2860         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2861         __ movl(temp1, Address(temp1, component_offset));
2862         __ MaybeUnpoisonHeapReference(temp1);
2863         __ cmpl(Address(temp1, super_offset), Immediate(0));
2864         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2865         __ Bind(&do_copy);
2866       } else {
2867         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2868       }
2869     }
2870   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2871     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2872     // Bail out if the source is not a non primitive array.
2873     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2874       // /* HeapReference<Class> */ temp1 = src->klass_
2875       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2876           invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2877       // /* HeapReference<Class> */ temp1 = temp1->component_type_
2878       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2879           invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2880       __ testl(temp1, temp1);
2881       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2882       // If heap poisoning is enabled, `temp1` has been unpoisoned
2883       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2884     } else {
2885       // /* HeapReference<Class> */ temp1 = src->klass_
2886       __ movl(temp1, Address(src, class_offset));
2887       __ MaybeUnpoisonHeapReference(temp1);
2888       // /* HeapReference<Class> */ temp1 = temp1->component_type_
2889       __ movl(temp1, Address(temp1, component_offset));
2890       __ testl(temp1, temp1);
2891       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2892       __ MaybeUnpoisonHeapReference(temp1);
2893     }
2894     __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2895     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2896   }
2897 
2898   const DataType::Type type = DataType::Type::kReference;
2899   const int32_t element_size = DataType::Size(type);
2900 
2901   // Compute the base source address in `temp1`.
2902   GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2903 
2904   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2905     // If it is needed (in the case of the fast-path loop), the base
2906     // destination address is computed later, as `temp2` is used for
2907     // intermediate computations.
2908 
2909     // Compute the end source address in `temp3`.
2910     if (length.IsStackSlot()) {
2911       // Location `length` is again pointing at a stack slot, as
2912       // register `temp3` (which was containing the length parameter
2913       // earlier) has been overwritten; restore it now
2914       DCHECK(length.Equals(length_arg));
2915       __ movl(temp3, Address(ESP, length.GetStackIndex()));
2916       length = Location::RegisterLocation(temp3);
2917     }
2918     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2919 
2920     // SystemArrayCopy implementation for Baker read barriers (see
2921     // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
2922     //
2923     //   if (src_ptr != end_ptr) {
2924     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2925     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2926     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
2927     //     if (is_gray) {
2928     //       // Slow-path copy.
2929     //       for (size_t i = 0; i != length; ++i) {
2930     //         dest_array[dest_pos + i] =
2931     //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
2932     //       }
2933     //     } else {
2934     //       // Fast-path copy.
2935     //       do {
2936     //         *dest_ptr++ = *src_ptr++;
2937     //       } while (src_ptr != end_ptr)
2938     //     }
2939     //   }
2940 
2941     NearLabel loop, done;
2942 
2943     // Don't enter copy loop if `length == 0`.
2944     __ cmpl(temp1, temp3);
2945     __ j(kEqual, &done);
2946 
2947     // Given the numeric representation, it's enough to check the low bit of the rb_state.
2948     static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2949     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2950     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
2951     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
2952     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
2953 
2954     // if (rb_state == ReadBarrier::GrayState())
2955     //   goto slow_path;
2956     // At this point, just do the "if" and make sure that flags are preserved until the branch.
2957     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
2958 
2959     // Load fence to prevent load-load reordering.
2960     // Note that this is a no-op, thanks to the x86 memory model.
2961     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2962 
2963     // Slow path used to copy array when `src` is gray.
2964     SlowPathCode* read_barrier_slow_path =
2965         new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
2966     codegen_->AddSlowPath(read_barrier_slow_path);
2967 
2968     // We have done the "if" of the gray bit check above, now branch based on the flags.
2969     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
2970 
2971     // Fast-path copy.
2972     // Compute the base destination address in `temp2`.
2973     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2974     // Iterate over the arrays and do a raw copy of the objects. We don't need to
2975     // poison/unpoison.
2976     __ Bind(&loop);
2977     __ pushl(Address(temp1, 0));
2978     __ cfi().AdjustCFAOffset(4);
2979     __ popl(Address(temp2, 0));
2980     __ cfi().AdjustCFAOffset(-4);
2981     __ addl(temp1, Immediate(element_size));
2982     __ addl(temp2, Immediate(element_size));
2983     __ cmpl(temp1, temp3);
2984     __ j(kNotEqual, &loop);
2985 
2986     __ Bind(read_barrier_slow_path->GetExitLabel());
2987     __ Bind(&done);
2988   } else {
2989     // Non read barrier code.
2990     // Compute the base destination address in `temp2`.
2991     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2992     // Compute the end source address in `temp3`.
2993     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2994     // Iterate over the arrays and do a raw copy of the objects. We don't need to
2995     // poison/unpoison.
2996     NearLabel loop, done;
2997     __ cmpl(temp1, temp3);
2998     __ j(kEqual, &done);
2999     __ Bind(&loop);
3000     __ pushl(Address(temp1, 0));
3001     __ cfi().AdjustCFAOffset(4);
3002     __ popl(Address(temp2, 0));
3003     __ cfi().AdjustCFAOffset(-4);
3004     __ addl(temp1, Immediate(element_size));
3005     __ addl(temp2, Immediate(element_size));
3006     __ cmpl(temp1, temp3);
3007     __ j(kNotEqual, &loop);
3008     __ Bind(&done);
3009   }
3010 
3011   // We only need one card marking on the destination array.
3012   codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false);
3013 
3014   __ Bind(intrinsic_slow_path->GetExitLabel());
3015 }
3016 
RequestBaseMethodAddressInRegister(HInvoke * invoke)3017 static void RequestBaseMethodAddressInRegister(HInvoke* invoke) {
3018   LocationSummary* locations = invoke->GetLocations();
3019   if (locations != nullptr) {
3020     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
3021     // Note: The base method address is not present yet when this is called from the
3022     // PCRelativeHandlerVisitor via IsCallFreeIntrinsic() to determine whether to insert it.
3023     if (invoke_static_or_direct->HasSpecialInput()) {
3024       DCHECK(invoke_static_or_direct->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
3025                  ->IsX86ComputeBaseMethodAddress());
3026       locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
3027                          Location::RequiresRegister());
3028     }
3029   }
3030 }
3031 
VisitIntegerValueOf(HInvoke * invoke)3032 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
3033   DCHECK(invoke->IsInvokeStaticOrDirect());
3034   InvokeRuntimeCallingConvention calling_convention;
3035   IntrinsicVisitor::ComputeIntegerValueOfLocations(
3036       invoke,
3037       codegen_,
3038       Location::RegisterLocation(EAX),
3039       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3040   RequestBaseMethodAddressInRegister(invoke);
3041 }
3042 
VisitIntegerValueOf(HInvoke * invoke)3043 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
3044   DCHECK(invoke->IsInvokeStaticOrDirect());
3045   IntrinsicVisitor::IntegerValueOfInfo info =
3046       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
3047   LocationSummary* locations = invoke->GetLocations();
3048   X86Assembler* assembler = GetAssembler();
3049 
3050   Register out = locations->Out().AsRegister<Register>();
3051   auto allocate_instance = [&]() {
3052     DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
3053     codegen_->LoadIntrinsicDeclaringClass(out, invoke->AsInvokeStaticOrDirect());
3054     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3055     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3056   };
3057   if (invoke->InputAt(0)->IsConstant()) {
3058     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3059     if (static_cast<uint32_t>(value - info.low) < info.length) {
3060       // Just embed the j.l.Integer in the code.
3061       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3062       codegen_->LoadBootImageAddress(
3063           out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
3064     } else {
3065       DCHECK(locations->CanCall());
3066       // Allocate and initialize a new j.l.Integer.
3067       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3068       // JIT object table.
3069       allocate_instance();
3070       __ movl(Address(out, info.value_offset), Immediate(value));
3071     }
3072   } else {
3073     DCHECK(locations->CanCall());
3074     Register in = locations->InAt(0).AsRegister<Register>();
3075     // Check bounds of our cache.
3076     __ leal(out, Address(in, -info.low));
3077     __ cmpl(out, Immediate(info.length));
3078     NearLabel allocate, done;
3079     __ j(kAboveEqual, &allocate);
3080     // If the value is within the bounds, load the j.l.Integer directly from the array.
3081     constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3082     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3083                   "Check heap reference size.");
3084     if (codegen_->GetCompilerOptions().IsBootImage()) {
3085       DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3086       size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3087       HX86ComputeBaseMethodAddress* method_address =
3088           invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3089       DCHECK(method_address != nullptr);
3090       Register method_address_reg =
3091           invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3092       __ movl(out,
3093               Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
3094       codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3095     } else {
3096       // Note: We're about to clobber the index in `out`, so we need to use `in` and
3097       // adjust the offset accordingly.
3098       uint32_t mid_array_boot_image_offset =
3099               info.array_data_boot_image_reference - info.low * kElementSize;
3100       codegen_->LoadBootImageAddress(
3101           out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3102       DCHECK_NE(out, in);
3103       __ movl(out, Address(out, in, TIMES_4, 0));
3104     }
3105     __ MaybeUnpoisonHeapReference(out);
3106     __ jmp(&done);
3107     __ Bind(&allocate);
3108     // Otherwise allocate and initialize a new j.l.Integer.
3109     allocate_instance();
3110     __ movl(Address(out, info.value_offset), in);
3111     __ Bind(&done);
3112   }
3113 }
3114 
VisitReferenceGetReferent(HInvoke * invoke)3115 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
3116   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3117   RequestBaseMethodAddressInRegister(invoke);
3118 }
3119 
VisitReferenceGetReferent(HInvoke * invoke)3120 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
3121   X86Assembler* assembler = GetAssembler();
3122   LocationSummary* locations = invoke->GetLocations();
3123 
3124   Location obj = locations->InAt(0);
3125   Location out = locations->Out();
3126 
3127   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
3128   codegen_->AddSlowPath(slow_path);
3129 
3130   if (kEmitCompilerReadBarrier) {
3131     // Check self->GetWeakRefAccessEnabled().
3132     ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
3133     __ fs()->cmpl(Address::Absolute(offset), Immediate(0));
3134     __ j(kEqual, slow_path->GetEntryLabel());
3135   }
3136 
3137   // Load the java.lang.ref.Reference class, use the output register as a temporary.
3138   codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<Register>(),
3139                                         invoke->AsInvokeStaticOrDirect());
3140 
3141   // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3142   MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3143   DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3144   DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3145             IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3146   __ cmpw(Address(out.AsRegister<Register>(), disable_intrinsic_offset.Uint32Value()),
3147           Immediate(0));
3148   __ j(kNotEqual, slow_path->GetEntryLabel());
3149 
3150   // Load the value from the field.
3151   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3152   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3153     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3154                                                     out,
3155                                                     obj.AsRegister<Register>(),
3156                                                     referent_offset,
3157                                                     /*needs_null_check=*/ true);
3158     // Note that the fence is a no-op, thanks to the x86 memory model.
3159     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3160   } else {
3161     __ movl(out.AsRegister<Register>(), Address(obj.AsRegister<Register>(), referent_offset));
3162     codegen_->MaybeRecordImplicitNullCheck(invoke);
3163     // Note that the fence is a no-op, thanks to the x86 memory model.
3164     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3165     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3166   }
3167   __ Bind(slow_path->GetExitLabel());
3168 }
3169 
VisitReferenceRefersTo(HInvoke * invoke)3170 void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) {
3171   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3172 }
3173 
VisitReferenceRefersTo(HInvoke * invoke)3174 void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
3175   X86Assembler* assembler = GetAssembler();
3176   LocationSummary* locations = invoke->GetLocations();
3177 
3178   Register obj = locations->InAt(0).AsRegister<Register>();
3179   Register other = locations->InAt(1).AsRegister<Register>();
3180   Register out = locations->Out().AsRegister<Register>();
3181 
3182   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3183   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3184 
3185   __ movl(out, Address(obj, referent_offset));
3186   codegen_->MaybeRecordImplicitNullCheck(invoke);
3187   __ MaybeUnpoisonHeapReference(out);
3188   // Note that the fence is a no-op, thanks to the x86 memory model.
3189   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3190 
3191   NearLabel end, return_true, return_false;
3192   __ cmpl(out, other);
3193 
3194   if (kEmitCompilerReadBarrier) {
3195     DCHECK(kUseBakerReadBarrier);
3196 
3197     __ j(kEqual, &return_true);
3198 
3199     // Check if the loaded reference is null.
3200     __ testl(out, out);
3201     __ j(kZero, &return_false);
3202 
3203     // For correct memory visibility, we need a barrier before loading the lock word
3204     // but we already have the barrier emitted for volatile load above which is sufficient.
3205 
3206     // Load the lockword and check if it is a forwarding address.
3207     static_assert(LockWord::kStateShift == 30u);
3208     static_assert(LockWord::kStateForwardingAddress == 3u);
3209     __ movl(out, Address(out, monitor_offset));
3210     __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3211     __ j(kBelow, &return_false);
3212 
3213     // Extract the forwarding address and compare with `other`.
3214     __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3215     __ cmpl(out, other);
3216   }
3217 
3218   __ j(kNotEqual, &return_false);
3219 
3220   // Return true and exit the function.
3221   __ Bind(&return_true);
3222   __ movl(out, Immediate(1));
3223   __ jmp(&end);
3224 
3225   // Return false and exit the function.
3226   __ Bind(&return_false);
3227   __ xorl(out, out);
3228   __ Bind(&end);
3229 }
3230 
VisitThreadInterrupted(HInvoke * invoke)3231 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3232   LocationSummary* locations =
3233       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3234   locations->SetOut(Location::RequiresRegister());
3235 }
3236 
VisitThreadInterrupted(HInvoke * invoke)3237 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3238   X86Assembler* assembler = GetAssembler();
3239   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3240   Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3241   NearLabel done;
3242   __ fs()->movl(out, address);
3243   __ testl(out, out);
3244   __ j(kEqual, &done);
3245   __ fs()->movl(address, Immediate(0));
3246   codegen_->MemoryFence();
3247   __ Bind(&done);
3248 }
3249 
VisitReachabilityFence(HInvoke * invoke)3250 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3251   LocationSummary* locations =
3252       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3253   locations->SetInAt(0, Location::Any());
3254 }
3255 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3256 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3257 
VisitIntegerDivideUnsigned(HInvoke * invoke)3258 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3259   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3260                                                                 LocationSummary::kCallOnSlowPath,
3261                                                                 kIntrinsified);
3262   locations->SetInAt(0, Location::RegisterLocation(EAX));
3263   locations->SetInAt(1, Location::RequiresRegister());
3264   locations->SetOut(Location::SameAsFirstInput());
3265   // Intel uses edx:eax as the dividend.
3266   locations->AddTemp(Location::RegisterLocation(EDX));
3267 }
3268 
VisitIntegerDivideUnsigned(HInvoke * invoke)3269 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3270   X86Assembler* assembler = GetAssembler();
3271   LocationSummary* locations = invoke->GetLocations();
3272   Location out = locations->Out();
3273   Location first = locations->InAt(0);
3274   Location second = locations->InAt(1);
3275   Register edx = locations->GetTemp(0).AsRegister<Register>();
3276   Register second_reg = second.AsRegister<Register>();
3277 
3278   DCHECK_EQ(EAX, first.AsRegister<Register>());
3279   DCHECK_EQ(EAX, out.AsRegister<Register>());
3280   DCHECK_EQ(EDX, edx);
3281 
3282   // Check if divisor is zero, bail to managed implementation to handle.
3283   __ testl(second_reg, second_reg);
3284   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3285   codegen_->AddSlowPath(slow_path);
3286   __ j(kEqual, slow_path->GetEntryLabel());
3287 
3288   __ xorl(edx, edx);
3289   __ divl(second_reg);
3290 
3291   __ Bind(slow_path->GetExitLabel());
3292 }
3293 
IsValidFieldVarHandleExpected(HInvoke * invoke)3294 static bool IsValidFieldVarHandleExpected(HInvoke* invoke) {
3295   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3296   if (expected_coordinates_count > 1u) {
3297     // Only static and instance fields VarHandle are supported now.
3298     return false;
3299   }
3300 
3301   if (expected_coordinates_count == 1u &&
3302       invoke->InputAt(1)->GetType() != DataType::Type::kReference) {
3303     // For instance fields, the source object must be a reference
3304     return false;
3305   }
3306 
3307   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3308   DataType::Type return_type = invoke->GetType();
3309   mirror::VarHandle::AccessModeTemplate access_mode_template =
3310       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
3311   switch (access_mode_template) {
3312     case mirror::VarHandle::AccessModeTemplate::kGet:
3313       // The return type should be the same as varType, so it shouldn't be void.
3314       if (return_type == DataType::Type::kVoid) {
3315         return false;
3316       }
3317       break;
3318     case mirror::VarHandle::AccessModeTemplate::kSet:
3319       if (return_type != DataType::Type::kVoid) {
3320         return false;
3321       }
3322       break;
3323     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet: {
3324       if (return_type != DataType::Type::kBool) {
3325         return false;
3326       }
3327       uint32_t expected_value_index = number_of_arguments - 2;
3328       uint32_t new_value_index = number_of_arguments - 1;
3329       DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
3330       DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
3331 
3332       if (expected_value_type != new_value_type) {
3333         return false;
3334       }
3335       break;
3336     }
3337     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
3338       DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
3339       if (IsVarHandleGetAndAdd(invoke) &&
3340           (value_type == DataType::Type::kReference || value_type == DataType::Type::kBool)) {
3341         // We should only add numerical types.
3342         return false;
3343       } else if (IsVarHandleGetAndBitwiseOp(invoke) && !DataType::IsIntegralType(value_type)) {
3344         // We can only apply operators to bitwise integral types.
3345         // Note that bitwise VarHandle operations accept a non-integral boolean type and
3346         // perform the appropriate logical operation. However, the result is the same as
3347         // using the bitwise operation on our boolean representation and this fits well
3348         // with DataType::IsIntegralType() treating the compiler type kBool as integral.
3349         return false;
3350       }
3351       if (value_type != return_type) {
3352         return false;
3353       }
3354       break;
3355     }
3356     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
3357       uint32_t expected_value_index = number_of_arguments - 2;
3358       uint32_t new_value_index = number_of_arguments - 1;
3359       DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
3360       DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
3361 
3362       if (expected_value_type != new_value_type || return_type != expected_value_type) {
3363         return false;
3364       }
3365       break;
3366     }
3367   }
3368 
3369   return true;
3370 }
3371 
GenerateVarHandleAccessModeCheck(Register varhandle_object,mirror::VarHandle::AccessMode access_mode,SlowPathCode * slow_path,X86Assembler * assembler)3372 static void GenerateVarHandleAccessModeCheck(Register varhandle_object,
3373                                              mirror::VarHandle::AccessMode access_mode,
3374                                              SlowPathCode* slow_path,
3375                                              X86Assembler* assembler) {
3376   const uint32_t access_modes_bitmask_offset =
3377       mirror::VarHandle::AccessModesBitMaskOffset().Uint32Value();
3378   const uint32_t access_mode_bit = 1u << static_cast<uint32_t>(access_mode);
3379 
3380   // If the access mode is not supported, bail to runtime implementation to handle
3381   __ testl(Address(varhandle_object, access_modes_bitmask_offset), Immediate(access_mode_bit));
3382   __ j(kZero, slow_path->GetEntryLabel());
3383 }
3384 
GenerateVarHandleStaticFieldCheck(Register varhandle_object,SlowPathCode * slow_path,X86Assembler * assembler)3385 static void GenerateVarHandleStaticFieldCheck(Register varhandle_object,
3386                                               SlowPathCode* slow_path,
3387                                               X86Assembler* assembler) {
3388   const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3389 
3390   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3391   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3392   __ cmpl(Address(varhandle_object, coordtype0_offset), Immediate(0));
3393   __ j(kNotEqual, slow_path->GetEntryLabel());
3394 }
3395 
GenerateSubTypeObjectCheck(Register object,Register temp,Address type_address,SlowPathCode * slow_path,X86Assembler * assembler,bool object_can_be_null=true)3396 static void GenerateSubTypeObjectCheck(Register object,
3397                                        Register temp,
3398                                        Address type_address,
3399                                        SlowPathCode* slow_path,
3400                                        X86Assembler* assembler,
3401                                        bool object_can_be_null = true) {
3402   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
3403   const uint32_t super_class_offset = mirror::Class::SuperClassOffset().Uint32Value();
3404   NearLabel check_type_compatibility, type_matched;
3405 
3406   // If the object is null, there is no need to check the type
3407   if (object_can_be_null) {
3408     __ testl(object, object);
3409     __ j(kZero, &type_matched);
3410   }
3411 
3412   // Do not unpoison for in-memory comparison.
3413   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3414   __ movl(temp, Address(object, class_offset));
3415   __ Bind(&check_type_compatibility);
3416   __ cmpl(temp, type_address);
3417   __ j(kEqual, &type_matched);
3418   // Load the super class.
3419   __ MaybeUnpoisonHeapReference(temp);
3420   __ movl(temp, Address(temp, super_class_offset));
3421   // If the super class is null, we reached the root of the hierarchy without a match.
3422   // We let the slow path handle uncovered cases (e.g. interfaces).
3423   __ testl(temp, temp);
3424   __ j(kEqual, slow_path->GetEntryLabel());
3425   __ jmp(&check_type_compatibility);
3426   __ Bind(&type_matched);
3427 }
3428 
GenerateVarHandleInstanceFieldObjectCheck(Register varhandle_object,Register object,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3429 static void GenerateVarHandleInstanceFieldObjectCheck(Register varhandle_object,
3430                                                       Register object,
3431                                                       Register temp,
3432                                                       SlowPathCode* slow_path,
3433                                                       X86Assembler* assembler) {
3434   const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3435   const uint32_t coordtype1_offset = mirror::VarHandle::CoordinateType1Offset().Uint32Value();
3436 
3437   // Check that the VarHandle references an instance field by checking that
3438   // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3439   // type compatibility check with the source object's type, which will fail for null.
3440   __ cmpl(Address(varhandle_object, coordtype1_offset), Immediate(0));
3441   __ j(kNotEqual, slow_path->GetEntryLabel());
3442 
3443   // Check if the object is null
3444   __ testl(object, object);
3445   __ j(kZero, slow_path->GetEntryLabel());
3446 
3447   // Check the object's class against coordinateType0.
3448   GenerateSubTypeObjectCheck(object,
3449                              temp,
3450                              Address(varhandle_object, coordtype0_offset),
3451                              slow_path,
3452                              assembler,
3453                              /* object_can_be_null= */ false);
3454 }
3455 
GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,Register temp,DataType::Type type,SlowPathCode * slow_path,X86Assembler * assembler)3456 static void GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,
3457                                               Register temp,
3458                                               DataType::Type type,
3459                                               SlowPathCode* slow_path,
3460                                               X86Assembler* assembler) {
3461   const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3462   const uint32_t primitive_type_offset = mirror::Class::PrimitiveTypeOffset().Uint32Value();
3463   const uint32_t primitive_type = static_cast<uint32_t>(DataTypeToPrimitive(type));
3464 
3465   // We do not need a read barrier when loading a reference only for loading a constant field
3466   // through the reference.
3467   __ movl(temp, Address(varhandle_object, var_type_offset));
3468   __ MaybeUnpoisonHeapReference(temp);
3469   __ cmpw(Address(temp, primitive_type_offset), Immediate(primitive_type));
3470   __ j(kNotEqual, slow_path->GetEntryLabel());
3471 }
3472 
GenerateVarHandleCommonChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3473 static void GenerateVarHandleCommonChecks(HInvoke *invoke,
3474                                           Register temp,
3475                                           SlowPathCode* slow_path,
3476                                           X86Assembler* assembler) {
3477   LocationSummary* locations = invoke->GetLocations();
3478   Register vh_object = locations->InAt(0).AsRegister<Register>();
3479   mirror::VarHandle::AccessMode access_mode =
3480       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3481 
3482   GenerateVarHandleAccessModeCheck(vh_object,
3483                                    access_mode,
3484                                    slow_path,
3485                                    assembler);
3486 
3487   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3488   switch (expected_coordinates_count) {
3489     case 0u:
3490       GenerateVarHandleStaticFieldCheck(vh_object, slow_path, assembler);
3491       break;
3492     case 1u: {
3493       Register object = locations->InAt(1).AsRegister<Register>();
3494       GenerateVarHandleInstanceFieldObjectCheck(vh_object, object, temp, slow_path, assembler);
3495       break;
3496     }
3497     default:
3498       // Unimplemented
3499       UNREACHABLE();
3500   }
3501 
3502   // Check the return type and varType parameters.
3503   mirror::VarHandle::AccessModeTemplate access_mode_template =
3504       mirror::VarHandle::GetAccessModeTemplate(access_mode);
3505   DataType::Type type = invoke->GetType();
3506 
3507   switch (access_mode_template) {
3508     case mirror::VarHandle::AccessModeTemplate::kGet:
3509       // Check the varType.primitiveType against the type we're trying to retrieve. Reference types
3510       // are also checked later by a HCheckCast node as an additional check.
3511       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler);
3512       break;
3513     case mirror::VarHandle::AccessModeTemplate::kSet:
3514     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
3515       uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3516       DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3517 
3518       // Check the varType.primitiveType against the type of the value we're trying to set.
3519       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3520       if (value_type == DataType::Type::kReference) {
3521         const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3522 
3523         // If the value type is a reference, check it against the varType.
3524         GenerateSubTypeObjectCheck(locations->InAt(value_index).AsRegister<Register>(),
3525                                    temp,
3526                                    Address(vh_object, var_type_offset),
3527                                    slow_path,
3528                                    assembler);
3529       }
3530       break;
3531     }
3532     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
3533     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
3534       uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
3535       uint32_t expected_value_index = invoke->GetNumberOfArguments() - 2;
3536       DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
3537       DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_value_index));
3538 
3539       // Check the varType.primitiveType against the type of the expected value.
3540       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3541       if (value_type == DataType::Type::kReference) {
3542         const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3543 
3544         // If the value type is a reference, check both the expected and the new value against
3545         // the varType.
3546         GenerateSubTypeObjectCheck(locations->InAt(new_value_index).AsRegister<Register>(),
3547                                    temp,
3548                                    Address(vh_object, var_type_offset),
3549                                    slow_path,
3550                                    assembler);
3551         GenerateSubTypeObjectCheck(locations->InAt(expected_value_index).AsRegister<Register>(),
3552                                    temp,
3553                                    Address(vh_object, var_type_offset),
3554                                    slow_path,
3555                                    assembler);
3556       }
3557       break;
3558     }
3559   }
3560 }
3561 
3562 // This method loads the field's address referred by a field VarHandle (base + offset).
3563 // The return value is the register containing object's reference (in case of an instance field)
3564 // or the declaring class (in case of a static field). The declaring class is stored in temp
3565 // register. Field's offset is loaded to the `offset` register.
GenerateVarHandleFieldReference(HInvoke * invoke,CodeGeneratorX86 * codegen,Register temp,Register offset)3566 static Register GenerateVarHandleFieldReference(HInvoke* invoke,
3567                                                 CodeGeneratorX86* codegen,
3568                                                 Register temp,
3569                                                 /*out*/ Register offset) {
3570   X86Assembler* assembler = codegen->GetAssembler();
3571   LocationSummary* locations = invoke->GetLocations();
3572   const uint32_t artfield_offset = mirror::FieldVarHandle::ArtFieldOffset().Uint32Value();
3573   const uint32_t offset_offset = ArtField::OffsetOffset().Uint32Value();
3574   const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value();
3575   Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3576 
3577   // Load the ArtField and the offset
3578   __ movl(temp, Address(varhandle_object, artfield_offset));
3579   __ movl(offset, Address(temp, offset_offset));
3580   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3581   if (expected_coordinates_count == 0) {
3582     // For static fields, load the declaring class
3583     InstructionCodeGeneratorX86* instr_codegen =
3584         down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
3585     instr_codegen->GenerateGcRootFieldLoad(invoke,
3586                                            Location::RegisterLocation(temp),
3587                                            Address(temp, declaring_class_offset),
3588                                            /* fixup_label= */ nullptr,
3589                                            kCompilerReadBarrierOption);
3590     return temp;
3591   }
3592 
3593   // For instance fields, return the register containing the object.
3594   DCHECK_EQ(expected_coordinates_count, 1u);
3595 
3596   return locations->InAt(1).AsRegister<Register>();
3597 }
3598 
CreateVarHandleGetLocations(HInvoke * invoke)3599 static void CreateVarHandleGetLocations(HInvoke* invoke) {
3600   // The only read barrier implementation supporting the
3601   // VarHandleGet intrinsic is the Baker-style read barriers.
3602   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
3603     return;
3604   }
3605 
3606   if (!IsValidFieldVarHandleExpected(invoke)) {
3607     return;
3608   }
3609 
3610   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3611   LocationSummary* locations = new (allocator) LocationSummary(
3612       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3613   locations->SetInAt(0, Location::RequiresRegister());
3614   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3615   if (expected_coordinates_count == 1u) {
3616     // For instance fields, this is the source object.
3617     locations->SetInAt(1, Location::RequiresRegister());
3618   }
3619   locations->AddTemp(Location::RequiresRegister());
3620 
3621   DataType::Type type = invoke->GetType();
3622   switch (DataType::Kind(type)) {
3623     case DataType::Type::kInt64:
3624       locations->AddTemp(Location::RequiresRegister());
3625       if (invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3626         // We need an XmmRegister for Int64 to ensure an atomic load
3627         locations->AddTemp(Location::RequiresFpuRegister());
3628       }
3629       FALLTHROUGH_INTENDED;
3630     case DataType::Type::kInt32:
3631     case DataType::Type::kReference:
3632       locations->SetOut(Location::RequiresRegister());
3633       break;
3634     default:
3635       DCHECK(DataType::IsFloatingPointType(type));
3636       locations->AddTemp(Location::RequiresRegister());
3637       locations->SetOut(Location::RequiresFpuRegister());
3638   }
3639 }
3640 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86 * codegen)3641 static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3642   // The only read barrier implementation supporting the
3643   // VarHandleGet intrinsic is the Baker-style read barriers.
3644   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
3645 
3646   X86Assembler* assembler = codegen->GetAssembler();
3647   LocationSummary* locations = invoke->GetLocations();
3648   DataType::Type type = invoke->GetType();
3649   DCHECK_NE(type, DataType::Type::kVoid);
3650   Register temp = locations->GetTemp(0).AsRegister<Register>();
3651   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3652   codegen->AddSlowPath(slow_path);
3653 
3654   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3655 
3656   Location out = locations->Out();
3657   // Use 'out' as a temporary register if it's a core register
3658   Register offset =
3659       out.IsRegister() ? out.AsRegister<Register>() : locations->GetTemp(1).AsRegister<Register>();
3660 
3661   // Get the field referred by the VarHandle. The returned register contains the object reference
3662   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3663   // declaring class will be placed in 'temp' register.
3664   Register ref = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3665   Address field_addr(ref, offset, TIMES_1, 0);
3666 
3667   // Load the value from the field
3668   if (type == DataType::Type::kReference && kCompilerReadBarrierOption == kWithReadBarrier) {
3669     codegen->GenerateReferenceLoadWithBakerReadBarrier(
3670         invoke, out, ref, field_addr, /* needs_null_check= */ false);
3671   } else if (type == DataType::Type::kInt64 &&
3672              invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3673     XmmRegister xmm_temp = locations->GetTemp(2).AsFpuRegister<XmmRegister>();
3674     codegen->LoadFromMemoryNoBarrier(type, out, field_addr, xmm_temp, /* is_atomic_load= */ true);
3675   } else {
3676     codegen->LoadFromMemoryNoBarrier(type, out, field_addr);
3677   }
3678 
3679   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetVolatile ||
3680       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAcquire) {
3681     // Load fence to prevent load-load reordering.
3682     // Note that this is a no-op, thanks to the x86 memory model.
3683     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3684   }
3685 
3686   __ Bind(slow_path->GetExitLabel());
3687 }
3688 
VisitVarHandleGet(HInvoke * invoke)3689 void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
3690   CreateVarHandleGetLocations(invoke);
3691 }
3692 
VisitVarHandleGet(HInvoke * invoke)3693 void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
3694   GenerateVarHandleGet(invoke, codegen_);
3695 }
3696 
VisitVarHandleGetVolatile(HInvoke * invoke)3697 void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
3698   CreateVarHandleGetLocations(invoke);
3699 }
3700 
VisitVarHandleGetVolatile(HInvoke * invoke)3701 void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
3702   GenerateVarHandleGet(invoke, codegen_);
3703 }
3704 
VisitVarHandleGetAcquire(HInvoke * invoke)3705 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
3706   CreateVarHandleGetLocations(invoke);
3707 }
3708 
VisitVarHandleGetAcquire(HInvoke * invoke)3709 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
3710   GenerateVarHandleGet(invoke, codegen_);
3711 }
3712 
VisitVarHandleGetOpaque(HInvoke * invoke)3713 void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
3714   CreateVarHandleGetLocations(invoke);
3715 }
3716 
VisitVarHandleGetOpaque(HInvoke * invoke)3717 void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
3718   GenerateVarHandleGet(invoke, codegen_);
3719 }
3720 
CreateVarHandleSetLocations(HInvoke * invoke)3721 static void CreateVarHandleSetLocations(HInvoke* invoke) {
3722   // The only read barrier implementation supporting the
3723   // VarHandleGet intrinsic is the Baker-style read barriers.
3724   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
3725     return;
3726   }
3727 
3728   if (!IsValidFieldVarHandleExpected(invoke)) {
3729     return;
3730   }
3731 
3732   // The last argument should be the value we intend to set.
3733   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3734   HInstruction* value = invoke->InputAt(value_index);
3735   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3736   bool needs_atomicity = invoke->GetIntrinsic() != Intrinsics::kVarHandleSet;
3737   if (value_type == DataType::Type::kInt64 && (!value->IsConstant() || needs_atomicity)) {
3738     // We avoid the case of a non-constant (or volatile) Int64 value because we would need to
3739     // place it in a register pair. If the slow path is taken, the ParallelMove might fail to move
3740     // the pair according to the X86DexCallingConvention in case of an overlap (e.g., move the
3741     // int64 value from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
3742     return;
3743   }
3744 
3745   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3746   LocationSummary* locations = new (allocator) LocationSummary(
3747       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3748   locations->SetInAt(0, Location::RequiresRegister());
3749   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3750   if (expected_coordinates_count == 1u) {
3751     // For instance fields, this is the source object
3752     locations->SetInAt(1, Location::RequiresRegister());
3753   }
3754 
3755   switch (value_type) {
3756     case DataType::Type::kBool:
3757     case DataType::Type::kInt8:
3758     case DataType::Type::kUint8:
3759       // Ensure the value is in a byte register
3760       locations->SetInAt(value_index, Location::ByteRegisterOrConstant(EBX, value));
3761       break;
3762     case DataType::Type::kInt16:
3763     case DataType::Type::kUint16:
3764     case DataType::Type::kInt32:
3765       locations->SetInAt(value_index, Location::RegisterOrConstant(value));
3766       break;
3767     case DataType::Type::kInt64:
3768       // We only handle constant non-atomic int64 values.
3769       DCHECK(value->IsConstant());
3770       locations->SetInAt(value_index, Location::ConstantLocation(value->AsConstant()));
3771       break;
3772     case DataType::Type::kReference:
3773       locations->SetInAt(value_index, Location::RequiresRegister());
3774       break;
3775     default:
3776       DCHECK(DataType::IsFloatingPointType(value_type));
3777       if (needs_atomicity && value_type == DataType::Type::kFloat64) {
3778         locations->SetInAt(value_index, Location::RequiresFpuRegister());
3779       } else {
3780         locations->SetInAt(value_index, Location::FpuRegisterOrConstant(value));
3781       }
3782   }
3783 
3784   locations->AddTemp(Location::RequiresRegister());
3785   // This temporary register is also used for card for MarkGCCard. Make sure it's a byte register
3786   locations->AddTemp(Location::RegisterLocation(EAX));
3787   if (expected_coordinates_count == 0 && value_type == DataType::Type::kReference) {
3788     // For static reference fields, we need another temporary for the declaring class. We set it
3789     // last because we want to make sure that the first 2 temps are reserved for HandleFieldSet.
3790     locations->AddTemp(Location::RequiresRegister());
3791   }
3792 }
3793 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86 * codegen)3794 static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3795   // The only read barrier implementation supporting the
3796   // VarHandleGet intrinsic is the Baker-style read barriers.
3797   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
3798 
3799   X86Assembler* assembler = codegen->GetAssembler();
3800   LocationSummary* locations = invoke->GetLocations();
3801   // The value we want to set is the last argument
3802   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3803   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3804   Register temp = locations->GetTemp(0).AsRegister<Register>();
3805   Register temp2 = locations->GetTemp(1).AsRegister<Register>();
3806   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3807   codegen->AddSlowPath(slow_path);
3808 
3809   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3810 
3811   // For static reference fields, we need another temporary for the declaring class. But since
3812   // for instance fields the object is in a separate register, it is safe to use the first
3813   // temporary register for GenerateVarHandleFieldReference.
3814   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3815   if (value_type == DataType::Type::kReference && expected_coordinates_count == 0) {
3816     temp = locations->GetTemp(2).AsRegister<Register>();
3817   }
3818 
3819   Register offset = temp2;
3820   // Get the field referred by the VarHandle. The returned register contains the object reference
3821   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3822   // declaring class will be placed in 'temp' register.
3823   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3824 
3825   bool is_volatile = false;
3826   switch (invoke->GetIntrinsic()) {
3827     case Intrinsics::kVarHandleSet:
3828     case Intrinsics::kVarHandleSetOpaque:
3829       // The only constraint for setOpaque is to ensure bitwise atomicity (atomically set 64 bit
3830       // values), but we don't treat Int64 values because we would need to place it in a register
3831       // pair. If the slow path is taken, the Parallel move might fail to move the register pair
3832       // in case of an overlap (e.g., move from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
3833       break;
3834     case Intrinsics::kVarHandleSetRelease:
3835       // setRelease needs to ensure atomicity too. See the above comment.
3836       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3837       break;
3838     case Intrinsics::kVarHandleSetVolatile:
3839       is_volatile = true;
3840       break;
3841     default:
3842       LOG(FATAL) << "GenerateVarHandleSet received non-set intrinsic " << invoke->GetIntrinsic();
3843   }
3844 
3845   InstructionCodeGeneratorX86* instr_codegen =
3846         down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
3847   // Store the value to the field
3848   instr_codegen->HandleFieldSet(invoke,
3849                                 value_index,
3850                                 value_type,
3851                                 Address(reference, offset, TIMES_1, 0),
3852                                 reference,
3853                                 is_volatile,
3854                                 /* value_can_be_null */ true);
3855 
3856   __ Bind(slow_path->GetExitLabel());
3857 }
3858 
VisitVarHandleSet(HInvoke * invoke)3859 void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) {
3860   CreateVarHandleSetLocations(invoke);
3861 }
3862 
VisitVarHandleSet(HInvoke * invoke)3863 void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
3864   GenerateVarHandleSet(invoke, codegen_);
3865 }
3866 
VisitVarHandleSetVolatile(HInvoke * invoke)3867 void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
3868   CreateVarHandleSetLocations(invoke);
3869 }
3870 
VisitVarHandleSetVolatile(HInvoke * invoke)3871 void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
3872   GenerateVarHandleSet(invoke, codegen_);
3873 }
3874 
VisitVarHandleSetRelease(HInvoke * invoke)3875 void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) {
3876   CreateVarHandleSetLocations(invoke);
3877 }
3878 
VisitVarHandleSetRelease(HInvoke * invoke)3879 void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
3880   GenerateVarHandleSet(invoke, codegen_);
3881 }
3882 
VisitVarHandleSetOpaque(HInvoke * invoke)3883 void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
3884   CreateVarHandleSetLocations(invoke);
3885 }
3886 
VisitVarHandleSetOpaque(HInvoke * invoke)3887 void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
3888   GenerateVarHandleSet(invoke, codegen_);
3889 }
3890 
CreateVarHandleGetAndSetLocations(HInvoke * invoke)3891 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
3892   // The only read barrier implementation supporting the
3893   // VarHandleGet intrinsic is the Baker-style read barriers.
3894   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
3895     return;
3896   }
3897 
3898   if (!IsValidFieldVarHandleExpected(invoke)) {
3899     return;
3900   }
3901 
3902   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3903   uint32_t value_index = number_of_arguments - 1;
3904   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3905 
3906   if (DataType::Is64BitType(value_type)) {
3907     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
3908     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
3909     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
3910     // <EAX, EBX> to <EBX, ECX>).
3911     return;
3912   }
3913 
3914   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3915   LocationSummary* locations = new (allocator) LocationSummary(
3916       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3917   locations->AddTemp(Location::RequiresRegister());
3918   locations->AddTemp(Location::RequiresRegister());
3919   // We use this temporary for the card, so we need a byte register
3920   locations->AddTemp(Location::RegisterLocation(EBX));
3921   locations->SetInAt(0, Location::RequiresRegister());
3922   if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
3923     // For instance fields, this is the source object
3924     locations->SetInAt(1, Location::RequiresRegister());
3925   } else {
3926     // For static fields, we need another temp because one will be busy with the declaring class.
3927     locations->AddTemp(Location::RequiresRegister());
3928   }
3929   if (value_type == DataType::Type::kFloat32) {
3930     locations->AddTemp(Location::RegisterLocation(EAX));
3931     locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index)));
3932     locations->SetOut(Location::RequiresFpuRegister());
3933   } else {
3934     locations->SetInAt(value_index, Location::RegisterLocation(EAX));
3935     locations->SetOut(Location::RegisterLocation(EAX));
3936   }
3937 }
3938 
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86 * codegen)3939 static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3940   // The only read barrier implementation supporting the
3941   // VarHandleGet intrinsic is the Baker-style read barriers.
3942   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
3943 
3944   X86Assembler* assembler = codegen->GetAssembler();
3945   LocationSummary* locations = invoke->GetLocations();
3946   // The value we want to set is the last argument
3947   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3948   Location value = locations->InAt(value_index);
3949   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3950   Register temp = locations->GetTemp(1).AsRegister<Register>();
3951   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
3952   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3953   codegen->AddSlowPath(slow_path);
3954 
3955   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3956 
3957   Register offset = locations->GetTemp(0).AsRegister<Register>();
3958   // Get the field referred by the VarHandle. The returned register contains the object reference
3959   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3960   // declaring class will be placed in 'temp' register.
3961   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3962   Address field_addr(reference, offset, TIMES_1, 0);
3963 
3964   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetRelease) {
3965     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3966   }
3967 
3968   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3969   // For static fields, we need another temporary for the declaring class. But since for instance
3970   // fields the object is in a separate register, it is safe to use the first temporary register.
3971   temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>();
3972   // No need for a lock prefix. `xchg` has an implicit lock when it is used with an address.
3973   switch (value_type) {
3974     case DataType::Type::kBool:
3975       __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
3976       __ movzxb(locations->Out().AsRegister<Register>(),
3977                 locations->Out().AsRegister<ByteRegister>());
3978       break;
3979     case DataType::Type::kInt8:
3980       __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
3981       __ movsxb(locations->Out().AsRegister<Register>(),
3982                 locations->Out().AsRegister<ByteRegister>());
3983       break;
3984     case DataType::Type::kUint16:
3985       __ xchgw(value.AsRegister<Register>(), field_addr);
3986       __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
3987       break;
3988     case DataType::Type::kInt16:
3989       __ xchgw(value.AsRegister<Register>(), field_addr);
3990       __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
3991       break;
3992     case DataType::Type::kInt32:
3993       __ xchgl(value.AsRegister<Register>(), field_addr);
3994       break;
3995     case DataType::Type::kFloat32:
3996       codegen->Move32(Location::RegisterLocation(EAX), value);
3997       __ xchgl(EAX, field_addr);
3998       __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
3999       break;
4000     case DataType::Type::kReference: {
4001       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4002         // Need to make sure the reference stored in the field is a to-space
4003         // one before attempting the CAS or the CAS could fail incorrectly.
4004         codegen->GenerateReferenceLoadWithBakerReadBarrier(
4005             invoke,
4006             // Unused, used only as a "temporary" within the read barrier.
4007             Location::RegisterLocation(temp),
4008             reference,
4009             field_addr,
4010             /* needs_null_check= */ false,
4011             /* always_update_field= */ true,
4012             &temp2);
4013       }
4014       codegen->MarkGCCard(
4015           temp, temp2, reference, value.AsRegister<Register>(), /* value_can_be_null= */ false);
4016       if (kPoisonHeapReferences) {
4017         __ movl(temp, value.AsRegister<Register>());
4018         __ PoisonHeapReference(temp);
4019         __ xchgl(temp, field_addr);
4020         __ UnpoisonHeapReference(temp);
4021         __ movl(locations->Out().AsRegister<Register>(), temp);
4022       } else {
4023         __ xchgl(locations->Out().AsRegister<Register>(), field_addr);
4024       }
4025       break;
4026     }
4027     default:
4028       UNREACHABLE();
4029   }
4030 
4031   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetAcquire) {
4032     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4033   }
4034 
4035   __ Bind(slow_path->GetExitLabel());
4036 }
4037 
VisitVarHandleGetAndSet(HInvoke * invoke)4038 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4039   CreateVarHandleGetAndSetLocations(invoke);
4040 }
4041 
VisitVarHandleGetAndSet(HInvoke * invoke)4042 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4043   GenerateVarHandleGetAndSet(invoke, codegen_);
4044 }
4045 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4046 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4047   CreateVarHandleGetAndSetLocations(invoke);
4048 }
4049 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4050 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4051   GenerateVarHandleGetAndSet(invoke, codegen_);
4052 }
4053 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4054 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4055   CreateVarHandleGetAndSetLocations(invoke);
4056 }
4057 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4058 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4059   GenerateVarHandleGetAndSet(invoke, codegen_);
4060 }
4061 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke)4062 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
4063   // The only read barrier implementation supporting the
4064   // VarHandleGet intrinsic is the Baker-style read barriers.
4065   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4066     return;
4067   }
4068 
4069   if (!IsValidFieldVarHandleExpected(invoke)) {
4070     return;
4071   }
4072 
4073   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4074   uint32_t expected_value_index = number_of_arguments - 2;
4075   uint32_t new_value_index = number_of_arguments - 1;
4076   DataType::Type value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4077   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, new_value_index));
4078 
4079   if (DataType::Is64BitType(value_type)) {
4080     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4081     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4082     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4083     // <EAX, EBX> to <EBX, ECX>).
4084     return;
4085   }
4086 
4087   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4088   LocationSummary* locations = new (allocator) LocationSummary(
4089       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4090   locations->AddTemp(Location::RequiresRegister());
4091   locations->AddTemp(Location::RequiresRegister());
4092   // We use this temporary for the card, so we need a byte register
4093   locations->AddTemp(Location::RegisterLocation(EBX));
4094   locations->SetInAt(0, Location::RequiresRegister());
4095   if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4096     // For instance fields, this is the source object
4097     locations->SetInAt(1, Location::RequiresRegister());
4098   } else {
4099     // For static fields, we need another temp because one will be busy with the declaring class.
4100     locations->AddTemp(Location::RequiresRegister());
4101   }
4102   if (DataType::IsFloatingPointType(value_type)) {
4103     // We need EAX for placing the expected value
4104     locations->AddTemp(Location::RegisterLocation(EAX));
4105     locations->SetInAt(new_value_index,
4106                        Location::FpuRegisterOrConstant(invoke->InputAt(new_value_index)));
4107     locations->SetInAt(expected_value_index,
4108                        Location::FpuRegisterOrConstant(invoke->InputAt(expected_value_index)));
4109   } else {
4110     // Ensure it's in a byte register
4111     locations->SetInAt(new_value_index, Location::RegisterLocation(ECX));
4112     locations->SetInAt(expected_value_index, Location::RegisterLocation(EAX));
4113   }
4114 
4115   mirror::VarHandle::AccessModeTemplate access_mode_template =
4116       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4117 
4118   if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange &&
4119       value_type == DataType::Type::kFloat32) {
4120     locations->SetOut(Location::RequiresFpuRegister());
4121   } else {
4122     locations->SetOut(Location::RegisterLocation(EAX));
4123   }
4124 }
4125 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86 * codegen)4126 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
4127   // The only read barrier implementation supporting the
4128   // VarHandleGet intrinsic is the Baker-style read barriers.
4129   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
4130 
4131   X86Assembler* assembler = codegen->GetAssembler();
4132   LocationSummary* locations = invoke->GetLocations();
4133   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4134   uint32_t expected_value_index = number_of_arguments - 2;
4135   uint32_t new_value_index = number_of_arguments - 1;
4136   DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4137   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4138   Location expected_value = locations->InAt(expected_value_index);
4139   Location new_value = locations->InAt(new_value_index);
4140   Register offset = locations->GetTemp(0).AsRegister<Register>();
4141   Register temp = locations->GetTemp(1).AsRegister<Register>();
4142   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4143   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4144   codegen->AddSlowPath(slow_path);
4145 
4146   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4147 
4148   // Get the field referred by the VarHandle. The returned register contains the object reference
4149   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4150   // declaring class will be placed in 'temp' register.
4151   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4152 
4153   uint32_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4154   // For generating the compare and exchange, we need 2 temporaries. In case of a static field, the
4155   // first temporary contains the declaring class so we need another temporary. In case of an
4156   // instance field, the object comes in a separate register so it's safe to use the first temp.
4157   temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(3).AsRegister<Register>();
4158   DCHECK_NE(temp, reference);
4159 
4160   // We are using `lock cmpxchg` in all cases because there is no CAS equivalent that has weak
4161   // failure semantics. `lock cmpxchg` has full barrier semantics, and we don't need scheduling
4162   // barriers at this time.
4163 
4164   mirror::VarHandle::AccessModeTemplate access_mode_template =
4165       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4166   bool is_cmpxchg =
4167       access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
4168 
4169   if (type == DataType::Type::kReference) {
4170     GenReferenceCAS(
4171         invoke, codegen, expected_value, new_value, reference, offset, temp, temp2, is_cmpxchg);
4172   } else {
4173     Location out = locations->Out();
4174     GenPrimitiveCAS(
4175         type, codegen, expected_value, new_value, reference, offset, out, temp, is_cmpxchg);
4176   }
4177 
4178   __ Bind(slow_path->GetExitLabel());
4179 }
4180 
VisitVarHandleCompareAndSet(HInvoke * invoke)4181 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4182   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4183 }
4184 
VisitVarHandleCompareAndSet(HInvoke * invoke)4185 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4186   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4187 }
4188 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4189 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4190   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4191 }
4192 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4193 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4194   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4195 }
4196 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4197 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4198   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4199 }
4200 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4201 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4202   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4203 }
4204 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4205 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4206   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4207 }
4208 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4209 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4210   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4211 }
4212 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4213 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4214   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4215 }
4216 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4217 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4218   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4219 }
4220 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4221 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4222   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4223 }
4224 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4225 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4226   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4227 }
4228 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4229 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4230   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4231 }
4232 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4233 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4234   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4235 }
4236 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4237 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4238   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4239 }
4240 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4241 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4242   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4243 }
4244 
CreateVarHandleGetAndAddLocations(HInvoke * invoke)4245 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
4246   // The only read barrier implementation supporting the
4247   // VarHandleGet intrinsic is the Baker-style read barriers.
4248   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4249     return;
4250   }
4251 
4252   if (!IsValidFieldVarHandleExpected(invoke)) {
4253     return;
4254   }
4255 
4256   // The last argument should be the value we intend to set.
4257   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4258   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4259   if (DataType::Is64BitType(value_type)) {
4260     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4261     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4262     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4263     // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4264     return;
4265   }
4266 
4267   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4268   LocationSummary* locations = new (allocator) LocationSummary(
4269       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4270   locations->AddTemp(Location::RequiresRegister());
4271   locations->AddTemp(Location::RequiresRegister());
4272   locations->SetInAt(0, Location::RequiresRegister());
4273   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4274   if (expected_coordinates_count == 1u) {
4275     // For instance fields, this is the source object
4276     locations->SetInAt(1, Location::RequiresRegister());
4277   } else {
4278     // For static fields, we need another temp because one will be busy with the declaring class.
4279     locations->AddTemp(Location::RequiresRegister());
4280   }
4281 
4282   if (DataType::IsFloatingPointType(value_type)) {
4283     locations->AddTemp(Location::RequiresFpuRegister());
4284     locations->AddTemp(Location::RegisterLocation(EAX));
4285     locations->SetInAt(value_index, Location::RequiresFpuRegister());
4286     locations->SetOut(Location::RequiresFpuRegister());
4287   } else {
4288     // xadd updates the register argument with the old value. ByteRegister required for xaddb.
4289     locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4290     locations->SetOut(Location::RegisterLocation(EAX));
4291   }
4292 }
4293 
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86 * codegen)4294 static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
4295   // The only read barrier implementation supporting the
4296   // VarHandleGet intrinsic is the Baker-style read barriers.
4297   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
4298 
4299   X86Assembler* assembler = codegen->GetAssembler();
4300   LocationSummary* locations = invoke->GetLocations();
4301   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4302   uint32_t value_index = number_of_arguments - 1;
4303   DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4304   DCHECK_EQ(type, invoke->GetType());
4305   Location value_loc = locations->InAt(value_index);
4306   Register temp = locations->GetTemp(0).AsRegister<Register>();
4307   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4308   codegen->AddSlowPath(slow_path);
4309 
4310   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4311 
4312   Register offset = locations->GetTemp(1).AsRegister<Register>();
4313   // Get the field referred by the VarHandle. The returned register contains the object reference
4314   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4315   // declaring class will be placed in 'temp' register.
4316   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4317 
4318   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4319   temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4320   DCHECK_NE(temp, reference);
4321   Address field_addr(reference, offset, TIMES_1, 0);
4322 
4323   switch (type) {
4324     case DataType::Type::kInt8:
4325       __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
4326       __ movsxb(locations->Out().AsRegister<Register>(),
4327                 locations->Out().AsRegister<ByteRegister>());
4328       break;
4329     case DataType::Type::kInt16:
4330       __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4331       __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4332       break;
4333     case DataType::Type::kUint16:
4334       __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4335       __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4336       break;
4337     case DataType::Type::kInt32:
4338       __ LockXaddl(field_addr, value_loc.AsRegister<Register>());
4339       break;
4340     case DataType::Type::kFloat32: {
4341       Location temp_float =
4342           (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3);
4343       DCHECK(temp_float.IsFpuRegister());
4344       Location eax = Location::RegisterLocation(EAX);
4345       NearLabel try_again;
4346       __ Bind(&try_again);
4347       __ movss(temp_float.AsFpuRegister<XmmRegister>(), field_addr);
4348       __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
4349       __ addss(temp_float.AsFpuRegister<XmmRegister>(),
4350                value_loc.AsFpuRegister<XmmRegister>());
4351       GenPrimitiveLockedCmpxchg(type,
4352                                 codegen,
4353                                 /* expected_value= */ eax,
4354                                 /* new_value= */ temp_float,
4355                                 reference,
4356                                 offset,
4357                                 temp);
4358       __ j(kNotZero, &try_again);
4359 
4360       // The old value is present in EAX.
4361       codegen->Move32(locations->Out(), eax);
4362       break;
4363     }
4364     default:
4365       UNREACHABLE();
4366   }
4367 
4368   __ Bind(slow_path->GetExitLabel());
4369 }
4370 
VisitVarHandleGetAndAdd(HInvoke * invoke)4371 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4372   CreateVarHandleGetAndAddLocations(invoke);
4373 }
4374 
VisitVarHandleGetAndAdd(HInvoke * invoke)4375 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4376   GenerateVarHandleGetAndAdd(invoke, codegen_);
4377 }
4378 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4379 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4380   CreateVarHandleGetAndAddLocations(invoke);
4381 }
4382 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4383 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4384   GenerateVarHandleGetAndAdd(invoke, codegen_);
4385 }
4386 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4387 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4388   CreateVarHandleGetAndAddLocations(invoke);
4389 }
4390 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4391 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4392   GenerateVarHandleGetAndAdd(invoke, codegen_);
4393 }
4394 
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke)4395 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
4396   // The only read barrier implementation supporting the
4397   // VarHandleGet intrinsic is the Baker-style read barriers.
4398   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4399     return;
4400   }
4401 
4402   if (!IsValidFieldVarHandleExpected(invoke)) {
4403     return;
4404   }
4405 
4406   // The last argument should be the value we intend to set.
4407   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4408   if (DataType::Is64BitType(GetDataTypeFromShorty(invoke, value_index))) {
4409     // We avoid the case of an Int64 value because we would need to place it in a register pair.
4410     // If the slow path is taken, the ParallelMove might fail to move the pair according to the
4411     // X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4412     // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4413     return;
4414   }
4415 
4416   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4417   LocationSummary* locations = new (allocator) LocationSummary(
4418       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4419   // We need a byte register temp to store the result of the bitwise operation
4420   locations->AddTemp(Location::RegisterLocation(EBX));
4421   locations->AddTemp(Location::RequiresRegister());
4422   locations->SetInAt(0, Location::RequiresRegister());
4423   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4424   if (expected_coordinates_count == 1u) {
4425     // For instance fields, this is the source object
4426     locations->SetInAt(1, Location::RequiresRegister());
4427   } else {
4428     // For static fields, we need another temp because one will be busy with the declaring class.
4429     locations->AddTemp(Location::RequiresRegister());
4430   }
4431 
4432   locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index)));
4433   locations->SetOut(Location::RegisterLocation(EAX));
4434 }
4435 
GenerateBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen,Register left,Register right)4436 static void GenerateBitwiseOp(HInvoke* invoke,
4437                               CodeGeneratorX86* codegen,
4438                               Register left,
4439                               Register right) {
4440   X86Assembler* assembler = codegen->GetAssembler();
4441 
4442   switch (invoke->GetIntrinsic()) {
4443     case Intrinsics::kVarHandleGetAndBitwiseOr:
4444     case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
4445     case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
4446       __ orl(left, right);
4447       break;
4448     case Intrinsics::kVarHandleGetAndBitwiseXor:
4449     case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
4450     case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
4451       __ xorl(left, right);
4452       break;
4453     case Intrinsics::kVarHandleGetAndBitwiseAnd:
4454     case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
4455     case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
4456       __ andl(left, right);
4457       break;
4458     default:
4459       UNREACHABLE();
4460   }
4461 }
4462 
GenerateVarHandleGetAndBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen)4463 static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
4464   // The only read barrier implementation supporting the
4465   // VarHandleGet intrinsic is the Baker-style read barriers.
4466   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
4467 
4468   X86Assembler* assembler = codegen->GetAssembler();
4469   LocationSummary* locations = invoke->GetLocations();
4470   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4471   DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4472   DCHECK_EQ(type, invoke->GetType());
4473   Register temp = locations->GetTemp(0).AsRegister<Register>();
4474   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4475   codegen->AddSlowPath(slow_path);
4476 
4477   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4478 
4479   Register offset = locations->GetTemp(1).AsRegister<Register>();
4480   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4481   // For static field, we need another temporary because the first one contains the declaring class
4482   Register reference =
4483       (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4484   // Get the field referred by the VarHandle. The returned register contains the object reference
4485   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4486   // declaring class will be placed in 'reference' register.
4487   reference = GenerateVarHandleFieldReference(invoke, codegen, reference, offset);
4488   DCHECK_NE(temp, reference);
4489   Address field_addr(reference, offset, TIMES_1, 0);
4490 
4491   Register out = locations->Out().AsRegister<Register>();
4492   DCHECK_EQ(out, EAX);
4493 
4494   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease ||
4495       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease ||
4496       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndRelease) {
4497     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4498   }
4499 
4500   NearLabel try_again;
4501   __ Bind(&try_again);
4502   // Place the expected value in EAX for cmpxchg
4503   codegen->LoadFromMemoryNoBarrier(type, locations->Out(), field_addr);
4504   codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
4505   GenerateBitwiseOp(invoke, codegen, temp, out);
4506   GenPrimitiveLockedCmpxchg(type,
4507                             codegen,
4508                             /* expected_value= */ locations->Out(),
4509                             /* new_value= */ locations->GetTemp(0),
4510                             reference,
4511                             offset);
4512   // If the cmpxchg failed, another thread changed the value so try again.
4513   __ j(kNotZero, &try_again);
4514 
4515   // The old value is present in EAX.
4516 
4517   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrAcquire ||
4518       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorAcquire ||
4519       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndAcquire) {
4520     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4521   }
4522 
4523   __ Bind(slow_path->GetExitLabel());
4524 }
4525 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4526 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4527   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4528 }
4529 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4530 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4531   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4532 }
4533 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4534 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4535   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4536 }
4537 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4538 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4539   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4540 }
4541 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4542 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4543   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4544 }
4545 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4546 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4547   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4548 }
4549 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4550 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4551   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4552 }
4553 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4554 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4555   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4556 }
4557 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4558 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4559   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4560 }
4561 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4562 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4563   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4564 }
4565 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4566 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4567   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4568 }
4569 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4570 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4571   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4572 }
4573 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4574 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4575   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4576 }
4577 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4578 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4579   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4580 }
4581 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4582 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4583   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4584 }
4585 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4586 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4587   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4588 }
4589 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4590 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4591   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4592 }
4593 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4594 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4595   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4596 }
4597 
4598 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
4599 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
4600 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
4601 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
4602 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
4603 UNIMPLEMENTED_INTRINSIC(X86, LongDivideUnsigned)
4604 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
4605 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
4606 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
4607 UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat)
4608 UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf)
4609 UNIMPLEMENTED_INTRINSIC(X86, FP16Floor)
4610 UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil)
4611 UNIMPLEMENTED_INTRINSIC(X86, FP16Rint)
4612 UNIMPLEMENTED_INTRINSIC(X86, FP16Greater)
4613 UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals)
4614 UNIMPLEMENTED_INTRINSIC(X86, FP16Less)
4615 UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals)
4616 UNIMPLEMENTED_INTRINSIC(X86, MathMultiplyHigh)
4617 
4618 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
4619 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
4620 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
4621 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
4622 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
4623 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject);
4624 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString);
4625 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence);
4626 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray);
4627 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean);
4628 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar);
4629 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt);
4630 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong);
4631 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat);
4632 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble);
4633 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
4634 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
4635 
4636 // 1.8.
4637 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
4638 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
4639 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
4640 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
4641 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
4642 
4643 UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvokeExact)
4644 UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvoke)
4645 
4646 UNREACHABLE_INTRINSICS(X86)
4647 
4648 #undef __
4649 
4650 }  // namespace x86
4651 }  // namespace art
4652