1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86.h"
18 
19 #include <limits>
20 
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsic_objects.h"
29 #include "intrinsics.h"
30 #include "intrinsics_utils.h"
31 #include "lock_word.h"
32 #include "mirror/array-inl.h"
33 #include "mirror/object_array-inl.h"
34 #include "mirror/reference.h"
35 #include "mirror/string.h"
36 #include "mirror/var_handle.h"
37 #include "scoped_thread_state_change-inl.h"
38 #include "thread-current-inl.h"
39 #include "utils/x86/assembler_x86.h"
40 #include "utils/x86/constants_x86.h"
41 #include "well_known_classes.h"
42 
43 namespace art HIDDEN {
44 
45 namespace x86 {
46 
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)47 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
48   : allocator_(codegen->GetGraph()->GetAllocator()),
49     codegen_(codegen) {
50 }
51 
52 
GetAssembler()53 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
54   return down_cast<X86Assembler*>(codegen_->GetAssembler());
55 }
56 
GetAllocator()57 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
58   return codegen_->GetGraph()->GetAllocator();
59 }
60 
TryDispatch(HInvoke * invoke)61 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
62   Dispatch(invoke);
63   LocationSummary* res = invoke->GetLocations();
64   if (res == nullptr) {
65     return false;
66   }
67   return res->Intrinsified();
68 }
69 
70 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
71 
72 #define __ assembler->
73 
GenArrayAddress(X86Assembler * assembler,Register dest,Register base,Location pos,DataType::Type type,uint32_t data_offset)74 static void GenArrayAddress(X86Assembler* assembler,
75                             Register dest,
76                             Register base,
77                             Location pos,
78                             DataType::Type type,
79                             uint32_t data_offset) {
80   if (pos.IsConstant()) {
81     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
82     __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
83   } else {
84     const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
85     __ leal(dest, Address(base, pos.AsRegister<Register>(), scale_factor, data_offset));
86   }
87 }
88 
89 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
90 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
91  public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)92   explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
93       : SlowPathCode(instruction) {
94   }
95 
EmitNativeCode(CodeGenerator * codegen)96   void EmitNativeCode(CodeGenerator* codegen) override {
97     DCHECK(codegen->EmitBakerReadBarrier());
98     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
99     X86Assembler* assembler = x86_codegen->GetAssembler();
100     LocationSummary* locations = instruction_->GetLocations();
101     DCHECK(locations->CanCall());
102     DCHECK(instruction_->IsInvokeStaticOrDirect())
103         << "Unexpected instruction in read barrier arraycopy slow path: "
104         << instruction_->DebugName();
105     DCHECK(instruction_->GetLocations()->Intrinsified());
106     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
107     Location length = locations->InAt(4);
108 
109     const DataType::Type type = DataType::Type::kReference;
110     const int32_t element_size = DataType::Size(type);
111 
112     Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
113     Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
114     Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
115     Register value = locations->GetTemp(3).AsRegister<Register>();
116 
117     __ Bind(GetEntryLabel());
118     // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
119     GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
120 
121     NearLabel loop;
122     __ Bind(&loop);
123     __ movl(value, Address(src_curr_addr, 0));
124     __ MaybeUnpoisonHeapReference(value);
125     // TODO: Inline the mark bit check before calling the runtime?
126     // value = ReadBarrier::Mark(value)
127     // No need to save live registers; it's taken care of by the
128     // entrypoint. Also, there is no need to update the stack mask,
129     // as this runtime call will not trigger a garbage collection.
130     // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
131     // explanations.)
132     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(value);
133     // This runtime call does not require a stack map.
134     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
135     __ MaybePoisonHeapReference(value);
136     __ movl(Address(dst_curr_addr, 0), value);
137     __ addl(src_curr_addr, Immediate(element_size));
138     __ addl(dst_curr_addr, Immediate(element_size));
139     __ cmpl(src_curr_addr, src_stop_addr);
140     __ j(kNotEqual, &loop);
141     __ jmp(GetExitLabel());
142   }
143 
GetDescription() const144   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
145 
146  private:
147   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
148 };
149 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)150 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
151   LocationSummary* locations =
152       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
153   locations->SetInAt(0, Location::RequiresFpuRegister());
154   locations->SetOut(Location::RequiresRegister());
155   if (is64bit) {
156     locations->AddTemp(Location::RequiresFpuRegister());
157   }
158 }
159 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)160 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
161   LocationSummary* locations =
162       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
163   locations->SetInAt(0, Location::RequiresRegister());
164   locations->SetOut(Location::RequiresFpuRegister());
165   if (is64bit) {
166     locations->AddTemp(Location::RequiresFpuRegister());
167     locations->AddTemp(Location::RequiresFpuRegister());
168   }
169 }
170 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)171 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
172   Location input = locations->InAt(0);
173   Location output = locations->Out();
174   if (is64bit) {
175     // Need to use the temporary.
176     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
177     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
178     __ movd(output.AsRegisterPairLow<Register>(), temp);
179     __ psrlq(temp, Immediate(32));
180     __ movd(output.AsRegisterPairHigh<Register>(), temp);
181   } else {
182     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
183   }
184 }
185 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)186 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
187   Location input = locations->InAt(0);
188   Location output = locations->Out();
189   if (is64bit) {
190     // Need to use the temporary.
191     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
192     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
193     __ movd(temp1, input.AsRegisterPairLow<Register>());
194     __ movd(temp2, input.AsRegisterPairHigh<Register>());
195     __ punpckldq(temp1, temp2);
196     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
197   } else {
198     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
199   }
200 }
201 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)202 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
203   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
204 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)205 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
206   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
207 }
208 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)209 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
210   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
211 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)212 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
213   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
214 }
215 
VisitFloatFloatToRawIntBits(HInvoke * invoke)216 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
217   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
218 }
VisitFloatIntBitsToFloat(HInvoke * invoke)219 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
220   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
221 }
222 
VisitFloatFloatToRawIntBits(HInvoke * invoke)223 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
224   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
225 }
VisitFloatIntBitsToFloat(HInvoke * invoke)226 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
227   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
228 }
229 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)230 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
231   LocationSummary* locations =
232       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
233   locations->SetInAt(0, Location::RequiresRegister());
234   locations->SetOut(Location::SameAsFirstInput());
235 }
236 
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)237 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
238   LocationSummary* locations =
239       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
240   locations->SetInAt(0, Location::RequiresRegister());
241   locations->SetOut(Location::RequiresRegister());
242 }
243 
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)244 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
245   LocationSummary* locations =
246       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
247   locations->SetInAt(0, Location::RequiresRegister());
248   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
249 }
250 
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)251 static void GenReverseBytes(LocationSummary* locations,
252                             DataType::Type size,
253                             X86Assembler* assembler) {
254   Register out = locations->Out().AsRegister<Register>();
255 
256   switch (size) {
257     case DataType::Type::kInt16:
258       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
259       __ bswapl(out);
260       __ sarl(out, Immediate(16));
261       break;
262     case DataType::Type::kInt32:
263       __ bswapl(out);
264       break;
265     default:
266       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
267       UNREACHABLE();
268   }
269 }
270 
VisitIntegerReverseBytes(HInvoke * invoke)271 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
272   CreateIntToIntLocations(allocator_, invoke);
273 }
274 
VisitIntegerReverseBytes(HInvoke * invoke)275 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
276   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
277 }
278 
VisitLongReverseBytes(HInvoke * invoke)279 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
280   CreateLongToLongLocations(allocator_, invoke);
281 }
282 
VisitLongReverseBytes(HInvoke * invoke)283 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
284   LocationSummary* locations = invoke->GetLocations();
285   Location input = locations->InAt(0);
286   Register input_lo = input.AsRegisterPairLow<Register>();
287   Register input_hi = input.AsRegisterPairHigh<Register>();
288   Location output = locations->Out();
289   Register output_lo = output.AsRegisterPairLow<Register>();
290   Register output_hi = output.AsRegisterPairHigh<Register>();
291 
292   X86Assembler* assembler = GetAssembler();
293   // Assign the inputs to the outputs, mixing low/high.
294   __ movl(output_lo, input_hi);
295   __ movl(output_hi, input_lo);
296   __ bswapl(output_lo);
297   __ bswapl(output_hi);
298 }
299 
VisitShortReverseBytes(HInvoke * invoke)300 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
301   CreateIntToIntLocations(allocator_, invoke);
302 }
303 
VisitShortReverseBytes(HInvoke * invoke)304 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
305   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
306 }
307 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)308 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
309   LocationSummary* locations =
310       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
311   locations->SetInAt(0, Location::RequiresFpuRegister());
312   locations->SetOut(Location::RequiresFpuRegister());
313 }
314 
VisitMathSqrt(HInvoke * invoke)315 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
316   CreateFPToFPLocations(allocator_, invoke);
317 }
318 
VisitMathSqrt(HInvoke * invoke)319 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
320   LocationSummary* locations = invoke->GetLocations();
321   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
322   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
323 
324   GetAssembler()->sqrtsd(out, in);
325 }
326 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)327 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
328                                        HInvoke* invoke,
329                                        CodeGeneratorX86* codegen) {
330   // Do we have instruction support?
331   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
332     return;
333   }
334 
335   CreateFPToFPLocations(allocator, invoke);
336 }
337 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)338 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
339   LocationSummary* locations = invoke->GetLocations();
340   DCHECK(!locations->WillCall());
341   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
342   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
343   __ roundsd(out, in, Immediate(round_mode));
344 }
345 
VisitMathCeil(HInvoke * invoke)346 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
347   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
348 }
349 
VisitMathCeil(HInvoke * invoke)350 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
351   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
352 }
353 
VisitMathFloor(HInvoke * invoke)354 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
355   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
356 }
357 
VisitMathFloor(HInvoke * invoke)358 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
359   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
360 }
361 
VisitMathRint(HInvoke * invoke)362 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
363   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
364 }
365 
VisitMathRint(HInvoke * invoke)366 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
367   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
368 }
369 
VisitMathRoundFloat(HInvoke * invoke)370 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
371   // Do we have instruction support?
372   if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
373     return;
374   }
375 
376   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
377   LocationSummary* locations =
378       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
379   locations->SetInAt(0, Location::RequiresFpuRegister());
380   if (static_or_direct->HasSpecialInput() &&
381       invoke->InputAt(
382           static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
383     locations->SetInAt(1, Location::RequiresRegister());
384   }
385   locations->SetOut(Location::RequiresRegister());
386   locations->AddTemp(Location::RequiresFpuRegister());
387   locations->AddTemp(Location::RequiresFpuRegister());
388 }
389 
VisitMathRoundFloat(HInvoke * invoke)390 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
391   LocationSummary* locations = invoke->GetLocations();
392   DCHECK(!locations->WillCall());
393 
394   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
395   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
396   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
397   Register out = locations->Out().AsRegister<Register>();
398   NearLabel skip_incr, done;
399   X86Assembler* assembler = GetAssembler();
400 
401   // Since no direct x86 rounding instruction matches the required semantics,
402   // this intrinsic is implemented as follows:
403   //  result = floor(in);
404   //  if (in - result >= 0.5f)
405   //    result = result + 1.0f;
406   __ movss(t2, in);
407   __ roundss(t1, in, Immediate(1));
408   __ subss(t2, t1);
409   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
410     // Direct constant area available.
411     HX86ComputeBaseMethodAddress* method_address =
412         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
413     Register constant_area = locations->InAt(1).AsRegister<Register>();
414     __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
415                                                 method_address,
416                                                 constant_area));
417     __ j(kBelow, &skip_incr);
418     __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
419                                                method_address,
420                                                constant_area));
421     __ Bind(&skip_incr);
422   } else {
423     // No constant area: go through stack.
424     __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
425     __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
426     __ comiss(t2, Address(ESP, 4));
427     __ j(kBelow, &skip_incr);
428     __ addss(t1, Address(ESP, 0));
429     __ Bind(&skip_incr);
430     __ addl(ESP, Immediate(8));
431   }
432 
433   // Final conversion to an integer. Unfortunately this also does not have a
434   // direct x86 instruction, since NaN should map to 0 and large positive
435   // values need to be clipped to the extreme value.
436   __ movl(out, Immediate(kPrimIntMax));
437   __ cvtsi2ss(t2, out);
438   __ comiss(t1, t2);
439   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
440   __ movl(out, Immediate(0));  // does not change flags
441   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
442   __ cvttss2si(out, t1);
443   __ Bind(&done);
444 }
445 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)446 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
447   LocationSummary* locations =
448       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
449   InvokeRuntimeCallingConvention calling_convention;
450   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
451   locations->SetOut(Location::FpuRegisterLocation(XMM0));
452 }
453 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)454 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
455   LocationSummary* locations = invoke->GetLocations();
456   DCHECK(locations->WillCall());
457   DCHECK(invoke->IsInvokeStaticOrDirect());
458   X86Assembler* assembler = codegen->GetAssembler();
459 
460   // We need some place to pass the parameters.
461   __ subl(ESP, Immediate(16));
462   __ cfi().AdjustCFAOffset(16);
463 
464   // Pass the parameters at the bottom of the stack.
465   __ movsd(Address(ESP, 0), XMM0);
466 
467   // If we have a second parameter, pass it next.
468   if (invoke->GetNumberOfArguments() == 2) {
469     __ movsd(Address(ESP, 8), XMM1);
470   }
471 
472   // Now do the actual call.
473   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
474 
475   // Extract the return value from the FP stack.
476   __ fstpl(Address(ESP, 0));
477   __ movsd(XMM0, Address(ESP, 0));
478 
479   // And clean up the stack.
480   __ addl(ESP, Immediate(16));
481   __ cfi().AdjustCFAOffset(-16);
482 }
483 
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)484 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
485   LocationSummary* locations =
486       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
487   if (is_long) {
488     locations->SetInAt(0, Location::RequiresRegister());
489   } else {
490     locations->SetInAt(0, Location::Any());
491   }
492   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
493 }
494 
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)495 static void GenLowestOneBit(X86Assembler* assembler,
496                       CodeGeneratorX86* codegen,
497                       bool is_long,
498                       HInvoke* invoke) {
499   LocationSummary* locations = invoke->GetLocations();
500   Location src = locations->InAt(0);
501   Location out_loc = locations->Out();
502 
503   if (invoke->InputAt(0)->IsConstant()) {
504     // Evaluate this at compile time.
505     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
506     if (value == 0) {
507       if (is_long) {
508         __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
509         __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
510       } else {
511         __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
512       }
513       return;
514     }
515     // Nonzero value.
516     value = is_long ? CTZ(static_cast<uint64_t>(value))
517                     : CTZ(static_cast<uint32_t>(value));
518     if (is_long) {
519       if (value >= 32) {
520         int shift = value-32;
521         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
522         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
523       } else {
524         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
525         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
526       }
527     } else {
528       codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
529     }
530     return;
531   }
532   // Handle non constant case
533   if (is_long) {
534     DCHECK(src.IsRegisterPair());
535     Register src_lo = src.AsRegisterPairLow<Register>();
536     Register src_hi = src.AsRegisterPairHigh<Register>();
537 
538     Register out_lo = out_loc.AsRegisterPairLow<Register>();
539     Register out_hi = out_loc.AsRegisterPairHigh<Register>();
540 
541     __ movl(out_lo, src_lo);
542     __ movl(out_hi, src_hi);
543 
544     __ negl(out_lo);
545     __ adcl(out_hi, Immediate(0));
546     __ negl(out_hi);
547 
548     __ andl(out_lo, src_lo);
549     __ andl(out_hi, src_hi);
550   } else {
551     if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
552       Register out = out_loc.AsRegister<Register>();
553       __ blsi(out, src.AsRegister<Register>());
554     } else {
555       Register out = out_loc.AsRegister<Register>();
556       // Do tmp & -tmp
557       if (src.IsRegister()) {
558         __ movl(out, src.AsRegister<Register>());
559       } else {
560         DCHECK(src.IsStackSlot());
561         __ movl(out, Address(ESP, src.GetStackIndex()));
562       }
563       __ negl(out);
564 
565       if (src.IsRegister()) {
566         __ andl(out, src.AsRegister<Register>());
567       } else {
568         __ andl(out, Address(ESP, src.GetStackIndex()));
569       }
570     }
571   }
572 }
573 
VisitMathCos(HInvoke * invoke)574 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
575   CreateFPToFPCallLocations(allocator_, invoke);
576 }
577 
VisitMathCos(HInvoke * invoke)578 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
579   GenFPToFPCall(invoke, codegen_, kQuickCos);
580 }
581 
VisitMathSin(HInvoke * invoke)582 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
583   CreateFPToFPCallLocations(allocator_, invoke);
584 }
585 
VisitMathSin(HInvoke * invoke)586 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
587   GenFPToFPCall(invoke, codegen_, kQuickSin);
588 }
589 
VisitMathAcos(HInvoke * invoke)590 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
591   CreateFPToFPCallLocations(allocator_, invoke);
592 }
593 
VisitMathAcos(HInvoke * invoke)594 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
595   GenFPToFPCall(invoke, codegen_, kQuickAcos);
596 }
597 
VisitMathAsin(HInvoke * invoke)598 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
599   CreateFPToFPCallLocations(allocator_, invoke);
600 }
601 
VisitMathAsin(HInvoke * invoke)602 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
603   GenFPToFPCall(invoke, codegen_, kQuickAsin);
604 }
605 
VisitMathAtan(HInvoke * invoke)606 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
607   CreateFPToFPCallLocations(allocator_, invoke);
608 }
609 
VisitMathAtan(HInvoke * invoke)610 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
611   GenFPToFPCall(invoke, codegen_, kQuickAtan);
612 }
613 
VisitMathCbrt(HInvoke * invoke)614 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
615   CreateFPToFPCallLocations(allocator_, invoke);
616 }
617 
VisitMathCbrt(HInvoke * invoke)618 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
619   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
620 }
621 
VisitMathCosh(HInvoke * invoke)622 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
623   CreateFPToFPCallLocations(allocator_, invoke);
624 }
625 
VisitMathCosh(HInvoke * invoke)626 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
627   GenFPToFPCall(invoke, codegen_, kQuickCosh);
628 }
629 
VisitMathExp(HInvoke * invoke)630 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
631   CreateFPToFPCallLocations(allocator_, invoke);
632 }
633 
VisitMathExp(HInvoke * invoke)634 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
635   GenFPToFPCall(invoke, codegen_, kQuickExp);
636 }
637 
VisitMathExpm1(HInvoke * invoke)638 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
639   CreateFPToFPCallLocations(allocator_, invoke);
640 }
641 
VisitMathExpm1(HInvoke * invoke)642 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
643   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
644 }
645 
VisitMathLog(HInvoke * invoke)646 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
647   CreateFPToFPCallLocations(allocator_, invoke);
648 }
649 
VisitMathLog(HInvoke * invoke)650 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
651   GenFPToFPCall(invoke, codegen_, kQuickLog);
652 }
653 
VisitMathLog10(HInvoke * invoke)654 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
655   CreateFPToFPCallLocations(allocator_, invoke);
656 }
657 
VisitMathLog10(HInvoke * invoke)658 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
659   GenFPToFPCall(invoke, codegen_, kQuickLog10);
660 }
661 
VisitMathSinh(HInvoke * invoke)662 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
663   CreateFPToFPCallLocations(allocator_, invoke);
664 }
665 
VisitMathSinh(HInvoke * invoke)666 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
667   GenFPToFPCall(invoke, codegen_, kQuickSinh);
668 }
669 
VisitMathTan(HInvoke * invoke)670 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
671   CreateFPToFPCallLocations(allocator_, invoke);
672 }
673 
VisitMathTan(HInvoke * invoke)674 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
675   GenFPToFPCall(invoke, codegen_, kQuickTan);
676 }
677 
VisitMathTanh(HInvoke * invoke)678 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
679   CreateFPToFPCallLocations(allocator_, invoke);
680 }
681 
VisitMathTanh(HInvoke * invoke)682 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
683   GenFPToFPCall(invoke, codegen_, kQuickTanh);
684 }
685 
VisitIntegerLowestOneBit(HInvoke * invoke)686 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
687   CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
688 }
VisitIntegerLowestOneBit(HInvoke * invoke)689 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
690   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
691 }
692 
VisitLongLowestOneBit(HInvoke * invoke)693 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
694   CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
695 }
696 
VisitLongLowestOneBit(HInvoke * invoke)697 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
698   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
699 }
700 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)701 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
702   LocationSummary* locations =
703       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
704   InvokeRuntimeCallingConvention calling_convention;
705   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
706   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
707   locations->SetOut(Location::FpuRegisterLocation(XMM0));
708 }
709 
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)710 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
711   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
712   LocationSummary* locations =
713       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
714   InvokeRuntimeCallingConvention calling_convention;
715   locations->SetInAt(0, Location::RequiresFpuRegister());
716   locations->SetInAt(1, Location::RequiresFpuRegister());
717   locations->SetInAt(2, Location::RequiresFpuRegister());
718   locations->SetOut(Location::SameAsFirstInput());
719 }
720 
VisitMathAtan2(HInvoke * invoke)721 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
722   CreateFPFPToFPCallLocations(allocator_, invoke);
723 }
724 
VisitMathAtan2(HInvoke * invoke)725 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
726   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
727 }
728 
VisitMathPow(HInvoke * invoke)729 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
730   CreateFPFPToFPCallLocations(allocator_, invoke);
731 }
732 
VisitMathPow(HInvoke * invoke)733 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
734   GenFPToFPCall(invoke, codegen_, kQuickPow);
735 }
736 
VisitMathHypot(HInvoke * invoke)737 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
738   CreateFPFPToFPCallLocations(allocator_, invoke);
739 }
740 
VisitMathHypot(HInvoke * invoke)741 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
742   GenFPToFPCall(invoke, codegen_, kQuickHypot);
743 }
744 
VisitMathNextAfter(HInvoke * invoke)745 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
746   CreateFPFPToFPCallLocations(allocator_, invoke);
747 }
748 
VisitMathNextAfter(HInvoke * invoke)749 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
750   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
751 }
752 
CreateSystemArrayCopyLocations(HInvoke * invoke)753 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
754   // We need at least two of the positions or length to be an integer constant,
755   // or else we won't have enough free registers.
756   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
757   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
758   HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
759 
760   int num_constants =
761       ((src_pos != nullptr) ? 1 : 0)
762       + ((dest_pos != nullptr) ? 1 : 0)
763       + ((length != nullptr) ? 1 : 0);
764 
765   if (num_constants < 2) {
766     // Not enough free registers.
767     return;
768   }
769 
770   // As long as we are checking, we might as well check to see if the src and dest
771   // positions are >= 0.
772   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
773       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
774     // We will have to fail anyways.
775     return;
776   }
777 
778   // And since we are already checking, check the length too.
779   if (length != nullptr) {
780     int32_t len = length->GetValue();
781     if (len < 0) {
782       // Just call as normal.
783       return;
784     }
785   }
786 
787   // Okay, it is safe to generate inline code.
788   LocationSummary* locations =
789       new (invoke->GetBlock()->GetGraph()->GetAllocator())
790       LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
791   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
792   locations->SetInAt(0, Location::RequiresRegister());
793   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
794   locations->SetInAt(2, Location::RequiresRegister());
795   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
796   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
797 
798   // And we need some temporaries.  We will use REP MOVS{B,W,L}, so we need fixed registers.
799   locations->AddTemp(Location::RegisterLocation(ESI));
800   locations->AddTemp(Location::RegisterLocation(EDI));
801   locations->AddTemp(Location::RegisterLocation(ECX));
802 }
803 
804 template <typename LhsType>
EmitCmplJLess(X86Assembler * assembler,LhsType lhs,Location rhs,Label * label)805 static void EmitCmplJLess(X86Assembler* assembler,
806                           LhsType lhs,
807                           Location rhs,
808                           Label* label) {
809   static_assert(std::is_same_v<LhsType, Register> || std::is_same_v<LhsType, Address>);
810   if (rhs.IsConstant()) {
811     int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
812     __ cmpl(lhs, Immediate(rhs_constant));
813   } else {
814     __ cmpl(lhs, rhs.AsRegister<Register>());
815   }
816   __ j(kLess, label);
817 }
818 
CheckSystemArrayCopyPosition(X86Assembler * assembler,Register array,Location pos,Location length,SlowPathCode * slow_path,Register temp,bool length_is_array_length,bool position_sign_checked)819 static void CheckSystemArrayCopyPosition(X86Assembler* assembler,
820                                          Register array,
821                                          Location pos,
822                                          Location length,
823                                          SlowPathCode* slow_path,
824                                          Register temp,
825                                          bool length_is_array_length,
826                                          bool position_sign_checked) {
827   // Where is the length in the Array?
828   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
829 
830   if (pos.IsConstant()) {
831     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
832     if (pos_const == 0) {
833       if (!length_is_array_length) {
834         // Check that length(array) >= length.
835         EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
836       }
837     } else {
838       // Calculate length(array) - pos.
839       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
840       // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
841       __ movl(temp, Address(array, length_offset));
842       __ subl(temp, Immediate(pos_const));
843 
844       // Check that (length(array) - pos) >= length.
845       EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
846     }
847   } else if (length_is_array_length) {
848     // The only way the copy can succeed is if pos is zero.
849     Register pos_reg = pos.AsRegister<Register>();
850     __ testl(pos_reg, pos_reg);
851     __ j(kNotEqual, slow_path->GetEntryLabel());
852   } else {
853     // Check that pos >= 0.
854     Register pos_reg = pos.AsRegister<Register>();
855     if (!position_sign_checked) {
856       __ testl(pos_reg, pos_reg);
857       __ j(kLess, slow_path->GetEntryLabel());
858     }
859 
860     // Calculate length(array) - pos.
861     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
862     // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
863     __ movl(temp, Address(array, length_offset));
864     __ subl(temp, pos_reg);
865 
866     // Check that (length(array) - pos) >= length.
867     EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
868   }
869 }
870 
SystemArrayCopyPrimitive(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,DataType::Type type)871 static void SystemArrayCopyPrimitive(HInvoke* invoke,
872                                      X86Assembler* assembler,
873                                      CodeGeneratorX86* codegen,
874                                      DataType::Type type) {
875   LocationSummary* locations = invoke->GetLocations();
876   Register src = locations->InAt(0).AsRegister<Register>();
877   Location src_pos = locations->InAt(1);
878   Register dest = locations->InAt(2).AsRegister<Register>();
879   Location dest_pos = locations->InAt(3);
880   Location length = locations->InAt(4);
881 
882   // Temporaries that we need for MOVSB/W/L.
883   Register src_base = locations->GetTemp(0).AsRegister<Register>();
884   DCHECK_EQ(src_base, ESI);
885   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
886   DCHECK_EQ(dest_base, EDI);
887   Register count = locations->GetTemp(2).AsRegister<Register>();
888   DCHECK_EQ(count, ECX);
889 
890   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
891   codegen->AddSlowPath(slow_path);
892 
893   // Bail out if the source and destination are the same (to handle overlap).
894   __ cmpl(src, dest);
895   __ j(kEqual, slow_path->GetEntryLabel());
896 
897   // Bail out if the source is null.
898   __ testl(src, src);
899   __ j(kEqual, slow_path->GetEntryLabel());
900 
901   // Bail out if the destination is null.
902   __ testl(dest, dest);
903   __ j(kEqual, slow_path->GetEntryLabel());
904 
905   // If the length is negative, bail out.
906   // We have already checked in the LocationsBuilder for the constant case.
907   if (!length.IsConstant()) {
908     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
909     __ j(kLess, slow_path->GetEntryLabel());
910   }
911 
912   // We need the count in ECX.
913   if (length.IsConstant()) {
914     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
915   } else {
916     __ movl(count, length.AsRegister<Register>());
917   }
918 
919   // Validity checks: source. Use src_base as a temporary register.
920   CheckSystemArrayCopyPosition(assembler,
921                                src,
922                                src_pos,
923                                Location::RegisterLocation(count),
924                                slow_path,
925                                src_base,
926                                /*length_is_array_length=*/ false,
927                                /*position_sign_checked=*/ false);
928 
929   // Validity checks: dest. Use src_base as a temporary register.
930   CheckSystemArrayCopyPosition(assembler,
931                                dest,
932                                dest_pos,
933                                Location::RegisterLocation(count),
934                                slow_path,
935                                src_base,
936                                /*length_is_array_length=*/ false,
937                                /*position_sign_checked=*/ false);
938 
939   // Okay, everything checks out.  Finally time to do the copy.
940   // Check assumption that sizeof(Char) is 2 (used in scaling below).
941   const size_t data_size = DataType::Size(type);
942   const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
943 
944   GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
945   GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
946 
947   // Do the move.
948   switch (type) {
949     case DataType::Type::kInt8:
950        __ rep_movsb();
951        break;
952     case DataType::Type::kUint16:
953        __ rep_movsw();
954        break;
955     case DataType::Type::kInt32:
956        __ rep_movsl();
957        break;
958     default:
959        LOG(FATAL) << "Unexpected data type for intrinsic";
960   }
961   __ Bind(slow_path->GetExitLabel());
962 }
963 
VisitSystemArrayCopyChar(HInvoke * invoke)964 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
965   CreateSystemArrayCopyLocations(invoke);
966 }
967 
VisitSystemArrayCopyChar(HInvoke * invoke)968 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
969   X86Assembler* assembler = GetAssembler();
970   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
971 }
972 
VisitSystemArrayCopyByte(HInvoke * invoke)973 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
974   X86Assembler* assembler = GetAssembler();
975   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
976 }
977 
VisitSystemArrayCopyByte(HInvoke * invoke)978 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
979   CreateSystemArrayCopyLocations(invoke);
980 }
981 
VisitSystemArrayCopyInt(HInvoke * invoke)982 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
983   X86Assembler* assembler = GetAssembler();
984   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
985 }
986 
VisitSystemArrayCopyInt(HInvoke * invoke)987 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
988   CreateSystemArrayCopyLocations(invoke);
989 }
990 
VisitStringCompareTo(HInvoke * invoke)991 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
992   // The inputs plus one temp.
993   LocationSummary* locations = new (allocator_) LocationSummary(
994       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
995   InvokeRuntimeCallingConvention calling_convention;
996   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
997   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
998   locations->SetOut(Location::RegisterLocation(EAX));
999 }
1000 
VisitStringCompareTo(HInvoke * invoke)1001 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1002   X86Assembler* assembler = GetAssembler();
1003   LocationSummary* locations = invoke->GetLocations();
1004 
1005   // Note that the null check must have been done earlier.
1006   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1007 
1008   Register argument = locations->InAt(1).AsRegister<Register>();
1009   __ testl(argument, argument);
1010   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1011   codegen_->AddSlowPath(slow_path);
1012   __ j(kEqual, slow_path->GetEntryLabel());
1013 
1014   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1015   __ Bind(slow_path->GetExitLabel());
1016 }
1017 
VisitStringEquals(HInvoke * invoke)1018 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1019   LocationSummary* locations =
1020       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1021   locations->SetInAt(0, Location::RequiresRegister());
1022   locations->SetInAt(1, Location::RequiresRegister());
1023 
1024   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1025   locations->AddTemp(Location::RegisterLocation(ECX));
1026   locations->AddTemp(Location::RegisterLocation(EDI));
1027 
1028   // Set output, ESI needed for repe_cmpsl instruction anyways.
1029   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1030 }
1031 
VisitStringEquals(HInvoke * invoke)1032 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1033   X86Assembler* assembler = GetAssembler();
1034   LocationSummary* locations = invoke->GetLocations();
1035 
1036   Register str = locations->InAt(0).AsRegister<Register>();
1037   Register arg = locations->InAt(1).AsRegister<Register>();
1038   Register ecx = locations->GetTemp(0).AsRegister<Register>();
1039   Register edi = locations->GetTemp(1).AsRegister<Register>();
1040   Register esi = locations->Out().AsRegister<Register>();
1041 
1042   NearLabel end, return_true, return_false;
1043 
1044   // Get offsets of count, value, and class fields within a string object.
1045   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1046   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1047   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1048 
1049   // Note that the null check must have been done earlier.
1050   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1051 
1052   StringEqualsOptimizations optimizations(invoke);
1053   if (!optimizations.GetArgumentNotNull()) {
1054     // Check if input is null, return false if it is.
1055     __ testl(arg, arg);
1056     __ j(kEqual, &return_false);
1057   }
1058 
1059   if (!optimizations.GetArgumentIsString()) {
1060     // Instanceof check for the argument by comparing class fields.
1061     // All string objects must have the same type since String cannot be subclassed.
1062     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1063     // If the argument is a string object, its class field must be equal to receiver's class field.
1064     //
1065     // As the String class is expected to be non-movable, we can read the class
1066     // field from String.equals' arguments without read barriers.
1067     AssertNonMovableStringClass();
1068     // Also, because we use the loaded class references only to compare them, we
1069     // don't need to unpoison them.
1070     // /* HeapReference<Class> */ ecx = str->klass_
1071     __ movl(ecx, Address(str, class_offset));
1072     // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1073     __ cmpl(ecx, Address(arg, class_offset));
1074     __ j(kNotEqual, &return_false);
1075   }
1076 
1077   // Reference equality check, return true if same reference.
1078   __ cmpl(str, arg);
1079   __ j(kEqual, &return_true);
1080 
1081   // Load length and compression flag of receiver string.
1082   __ movl(ecx, Address(str, count_offset));
1083   // Check if lengths and compression flags are equal, return false if they're not.
1084   // Two identical strings will always have same compression style since
1085   // compression style is decided on alloc.
1086   __ cmpl(ecx, Address(arg, count_offset));
1087   __ j(kNotEqual, &return_false);
1088   // Return true if strings are empty. Even with string compression `count == 0` means empty.
1089   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1090                 "Expecting 0=compressed, 1=uncompressed");
1091   __ jecxz(&return_true);
1092 
1093   if (mirror::kUseStringCompression) {
1094     NearLabel string_uncompressed;
1095     // Extract length and differentiate between both compressed or both uncompressed.
1096     // Different compression style is cut above.
1097     __ shrl(ecx, Immediate(1));
1098     __ j(kCarrySet, &string_uncompressed);
1099     // Divide string length by 2, rounding up, and continue as if uncompressed.
1100     __ addl(ecx, Immediate(1));
1101     __ shrl(ecx, Immediate(1));
1102     __ Bind(&string_uncompressed);
1103   }
1104   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1105   __ leal(esi, Address(str, value_offset));
1106   __ leal(edi, Address(arg, value_offset));
1107 
1108   // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1109   // divisible by 2.
1110   __ addl(ecx, Immediate(1));
1111   __ shrl(ecx, Immediate(1));
1112 
1113   // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1114   // or 4 characters (compressed) at a time.
1115   DCHECK_ALIGNED(value_offset, 4);
1116   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1117 
1118   // Loop to compare strings two characters at a time starting at the beginning of the string.
1119   __ repe_cmpsl();
1120   // If strings are not equal, zero flag will be cleared.
1121   __ j(kNotEqual, &return_false);
1122 
1123   // Return true and exit the function.
1124   // If loop does not result in returning false, we return true.
1125   __ Bind(&return_true);
1126   __ movl(esi, Immediate(1));
1127   __ jmp(&end);
1128 
1129   // Return false and exit the function.
1130   __ Bind(&return_false);
1131   __ xorl(esi, esi);
1132   __ Bind(&end);
1133 }
1134 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1135 static void CreateStringIndexOfLocations(HInvoke* invoke,
1136                                          ArenaAllocator* allocator,
1137                                          bool start_at_zero) {
1138   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1139                                                                LocationSummary::kCallOnSlowPath,
1140                                                                kIntrinsified);
1141   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1142   locations->SetInAt(0, Location::RegisterLocation(EDI));
1143   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1144   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1145   // of the instruction explicitly.
1146   // Note: This works as we don't clobber EAX anywhere.
1147   locations->SetInAt(1, Location::RegisterLocation(EAX));
1148   if (!start_at_zero) {
1149     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1150   }
1151   // As we clobber EDI during execution anyways, also use it as the output.
1152   locations->SetOut(Location::SameAsFirstInput());
1153 
1154   // repne scasw uses ECX as the counter.
1155   locations->AddTemp(Location::RegisterLocation(ECX));
1156   // Need another temporary to be able to compute the result.
1157   locations->AddTemp(Location::RequiresRegister());
1158   if (mirror::kUseStringCompression) {
1159     // Need another temporary to be able to save unflagged string length.
1160     locations->AddTemp(Location::RequiresRegister());
1161   }
1162 }
1163 
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1164 static void GenerateStringIndexOf(HInvoke* invoke,
1165                                   X86Assembler* assembler,
1166                                   CodeGeneratorX86* codegen,
1167                                   bool start_at_zero) {
1168   LocationSummary* locations = invoke->GetLocations();
1169 
1170   // Note that the null check must have been done earlier.
1171   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1172 
1173   Register string_obj = locations->InAt(0).AsRegister<Register>();
1174   Register search_value = locations->InAt(1).AsRegister<Register>();
1175   Register counter = locations->GetTemp(0).AsRegister<Register>();
1176   Register string_length = locations->GetTemp(1).AsRegister<Register>();
1177   Register out = locations->Out().AsRegister<Register>();
1178   // Only used when string compression feature is on.
1179   Register string_length_flagged;
1180 
1181   // Check our assumptions for registers.
1182   DCHECK_EQ(string_obj, EDI);
1183   DCHECK_EQ(search_value, EAX);
1184   DCHECK_EQ(counter, ECX);
1185   DCHECK_EQ(out, EDI);
1186 
1187   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1188   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1189   SlowPathCode* slow_path = nullptr;
1190   HInstruction* code_point = invoke->InputAt(1);
1191   if (code_point->IsIntConstant()) {
1192     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1193         std::numeric_limits<uint16_t>::max()) {
1194       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1195       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1196       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1197       codegen->AddSlowPath(slow_path);
1198       __ jmp(slow_path->GetEntryLabel());
1199       __ Bind(slow_path->GetExitLabel());
1200       return;
1201     }
1202   } else if (code_point->GetType() != DataType::Type::kUint16) {
1203     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1204     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1205     codegen->AddSlowPath(slow_path);
1206     __ j(kAbove, slow_path->GetEntryLabel());
1207   }
1208 
1209   // From here down, we know that we are looking for a char that fits in 16 bits.
1210   // Location of reference to data array within the String object.
1211   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1212   // Location of count within the String object.
1213   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1214 
1215   // Load the count field of the string containing the length and compression flag.
1216   __ movl(string_length, Address(string_obj, count_offset));
1217 
1218   // Do a zero-length check. Even with string compression `count == 0` means empty.
1219   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1220                 "Expecting 0=compressed, 1=uncompressed");
1221   // TODO: Support jecxz.
1222   NearLabel not_found_label;
1223   __ testl(string_length, string_length);
1224   __ j(kEqual, &not_found_label);
1225 
1226   if (mirror::kUseStringCompression) {
1227     string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1228     __ movl(string_length_flagged, string_length);
1229     // Extract the length and shift out the least significant bit used as compression flag.
1230     __ shrl(string_length, Immediate(1));
1231   }
1232 
1233   if (start_at_zero) {
1234     // Number of chars to scan is the same as the string length.
1235     __ movl(counter, string_length);
1236 
1237     // Move to the start of the string.
1238     __ addl(string_obj, Immediate(value_offset));
1239   } else {
1240     Register start_index = locations->InAt(2).AsRegister<Register>();
1241 
1242     // Do a start_index check.
1243     __ cmpl(start_index, string_length);
1244     __ j(kGreaterEqual, &not_found_label);
1245 
1246     // Ensure we have a start index >= 0;
1247     __ xorl(counter, counter);
1248     __ cmpl(start_index, Immediate(0));
1249     __ cmovl(kGreater, counter, start_index);
1250 
1251     if (mirror::kUseStringCompression) {
1252       NearLabel modify_counter, offset_uncompressed_label;
1253       __ testl(string_length_flagged, Immediate(1));
1254       __ j(kNotZero, &offset_uncompressed_label);
1255       // Move to the start of the string: string_obj + value_offset + start_index.
1256       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1257       __ jmp(&modify_counter);
1258 
1259       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1260       __ Bind(&offset_uncompressed_label);
1261       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1262 
1263       // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1264       // compare.
1265       __ Bind(&modify_counter);
1266     } else {
1267       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1268     }
1269     __ negl(counter);
1270     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1271   }
1272 
1273   if (mirror::kUseStringCompression) {
1274     NearLabel uncompressed_string_comparison;
1275     NearLabel comparison_done;
1276     __ testl(string_length_flagged, Immediate(1));
1277     __ j(kNotZero, &uncompressed_string_comparison);
1278 
1279     // Check if EAX (search_value) is ASCII.
1280     __ cmpl(search_value, Immediate(127));
1281     __ j(kGreater, &not_found_label);
1282     // Comparing byte-per-byte.
1283     __ repne_scasb();
1284     __ jmp(&comparison_done);
1285 
1286     // Everything is set up for repne scasw:
1287     //   * Comparison address in EDI.
1288     //   * Counter in ECX.
1289     __ Bind(&uncompressed_string_comparison);
1290     __ repne_scasw();
1291     __ Bind(&comparison_done);
1292   } else {
1293     __ repne_scasw();
1294   }
1295   // Did we find a match?
1296   __ j(kNotEqual, &not_found_label);
1297 
1298   // Yes, we matched.  Compute the index of the result.
1299   __ subl(string_length, counter);
1300   __ leal(out, Address(string_length, -1));
1301 
1302   NearLabel done;
1303   __ jmp(&done);
1304 
1305   // Failed to match; return -1.
1306   __ Bind(&not_found_label);
1307   __ movl(out, Immediate(-1));
1308 
1309   // And join up at the end.
1310   __ Bind(&done);
1311   if (slow_path != nullptr) {
1312     __ Bind(slow_path->GetExitLabel());
1313   }
1314 }
1315 
VisitStringIndexOf(HInvoke * invoke)1316 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1317   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1318 }
1319 
VisitStringIndexOf(HInvoke * invoke)1320 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1321   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1322 }
1323 
VisitStringIndexOfAfter(HInvoke * invoke)1324 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1325   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1326 }
1327 
VisitStringIndexOfAfter(HInvoke * invoke)1328 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1329   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1330 }
1331 
VisitStringNewStringFromBytes(HInvoke * invoke)1332 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1333   LocationSummary* locations = new (allocator_) LocationSummary(
1334       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1335   InvokeRuntimeCallingConvention calling_convention;
1336   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1337   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1338   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1339   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1340   locations->SetOut(Location::RegisterLocation(EAX));
1341 }
1342 
VisitStringNewStringFromBytes(HInvoke * invoke)1343 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1344   X86Assembler* assembler = GetAssembler();
1345   LocationSummary* locations = invoke->GetLocations();
1346 
1347   Register byte_array = locations->InAt(0).AsRegister<Register>();
1348   __ testl(byte_array, byte_array);
1349   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1350   codegen_->AddSlowPath(slow_path);
1351   __ j(kEqual, slow_path->GetEntryLabel());
1352 
1353   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1354   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1355   __ Bind(slow_path->GetExitLabel());
1356 }
1357 
VisitStringNewStringFromChars(HInvoke * invoke)1358 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1359   LocationSummary* locations =
1360       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1361   InvokeRuntimeCallingConvention calling_convention;
1362   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1363   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1364   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1365   locations->SetOut(Location::RegisterLocation(EAX));
1366 }
1367 
VisitStringNewStringFromChars(HInvoke * invoke)1368 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1369   // No need to emit code checking whether `locations->InAt(2)` is a null
1370   // pointer, as callers of the native method
1371   //
1372   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1373   //
1374   // all include a null check on `data` before calling that method.
1375   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1376   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1377 }
1378 
VisitStringNewStringFromString(HInvoke * invoke)1379 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1380   LocationSummary* locations = new (allocator_) LocationSummary(
1381       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1382   InvokeRuntimeCallingConvention calling_convention;
1383   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1384   locations->SetOut(Location::RegisterLocation(EAX));
1385 }
1386 
VisitStringNewStringFromString(HInvoke * invoke)1387 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1388   X86Assembler* assembler = GetAssembler();
1389   LocationSummary* locations = invoke->GetLocations();
1390 
1391   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1392   __ testl(string_to_copy, string_to_copy);
1393   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1394   codegen_->AddSlowPath(slow_path);
1395   __ j(kEqual, slow_path->GetEntryLabel());
1396 
1397   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1398   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1399   __ Bind(slow_path->GetExitLabel());
1400 }
1401 
VisitStringGetCharsNoCheck(HInvoke * invoke)1402 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1403   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1404   LocationSummary* locations =
1405       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1406   locations->SetInAt(0, Location::RequiresRegister());
1407   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1408   // Place srcEnd in ECX to save a move below.
1409   locations->SetInAt(2, Location::RegisterLocation(ECX));
1410   locations->SetInAt(3, Location::RequiresRegister());
1411   locations->SetInAt(4, Location::RequiresRegister());
1412 
1413   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1414   // We don't have enough registers to also grab ECX, so handle below.
1415   locations->AddTemp(Location::RegisterLocation(ESI));
1416   locations->AddTemp(Location::RegisterLocation(EDI));
1417 }
1418 
VisitStringGetCharsNoCheck(HInvoke * invoke)1419 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1420   X86Assembler* assembler = GetAssembler();
1421   LocationSummary* locations = invoke->GetLocations();
1422 
1423   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1424   // Location of data in char array buffer.
1425   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1426   // Location of char array data in string.
1427   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1428 
1429   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1430   Register obj = locations->InAt(0).AsRegister<Register>();
1431   Location srcBegin = locations->InAt(1);
1432   int srcBegin_value =
1433       srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1434   Register srcEnd = locations->InAt(2).AsRegister<Register>();
1435   Register dst = locations->InAt(3).AsRegister<Register>();
1436   Register dstBegin = locations->InAt(4).AsRegister<Register>();
1437 
1438   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1439   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1440   DCHECK_EQ(char_size, 2u);
1441 
1442   // Compute the number of chars (words) to move.
1443   // Save ECX, since we don't know if it will be used later.
1444   __ pushl(ECX);
1445   int stack_adjust = kX86WordSize;
1446   __ cfi().AdjustCFAOffset(stack_adjust);
1447   DCHECK_EQ(srcEnd, ECX);
1448   if (srcBegin.IsConstant()) {
1449     __ subl(ECX, Immediate(srcBegin_value));
1450   } else {
1451     DCHECK(srcBegin.IsRegister());
1452     __ subl(ECX, srcBegin.AsRegister<Register>());
1453   }
1454 
1455   NearLabel done;
1456   if (mirror::kUseStringCompression) {
1457     // Location of count in string
1458     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1459     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1460     DCHECK_EQ(c_char_size, 1u);
1461     __ pushl(EAX);
1462     __ cfi().AdjustCFAOffset(stack_adjust);
1463 
1464     NearLabel copy_loop, copy_uncompressed;
1465     __ testl(Address(obj, count_offset), Immediate(1));
1466     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1467                   "Expecting 0=compressed, 1=uncompressed");
1468     __ j(kNotZero, &copy_uncompressed);
1469     // Compute the address of the source string by adding the number of chars from
1470     // the source beginning to the value offset of a string.
1471     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1472 
1473     // Start the loop to copy String's value to Array of Char.
1474     __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1475     __ Bind(&copy_loop);
1476     __ jecxz(&done);
1477     // Use EAX temporary (convert byte from ESI to word).
1478     // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1479     __ movzxb(EAX, Address(ESI, 0));
1480     __ movw(Address(EDI, 0), EAX);
1481     __ leal(EDI, Address(EDI, char_size));
1482     __ leal(ESI, Address(ESI, c_char_size));
1483     // TODO: Add support for LOOP to X86Assembler.
1484     __ subl(ECX, Immediate(1));
1485     __ jmp(&copy_loop);
1486     __ Bind(&copy_uncompressed);
1487   }
1488 
1489   // Do the copy for uncompressed string.
1490   // Compute the address of the destination buffer.
1491   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1492   __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1493   __ rep_movsw();
1494 
1495   __ Bind(&done);
1496   if (mirror::kUseStringCompression) {
1497     // Restore EAX.
1498     __ popl(EAX);
1499     __ cfi().AdjustCFAOffset(-stack_adjust);
1500   }
1501   // Restore ECX.
1502   __ popl(ECX);
1503   __ cfi().AdjustCFAOffset(-stack_adjust);
1504 }
1505 
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1506 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1507   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1508   Location out_loc = locations->Out();
1509   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1510   // to avoid a SIGBUS.
1511   switch (size) {
1512     case DataType::Type::kInt8:
1513       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1514       break;
1515     case DataType::Type::kInt16:
1516       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1517       break;
1518     case DataType::Type::kInt32:
1519       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1520       break;
1521     case DataType::Type::kInt64:
1522       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1523       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1524       break;
1525     default:
1526       LOG(FATAL) << "Type not recognized for peek: " << size;
1527       UNREACHABLE();
1528   }
1529 }
1530 
VisitMemoryPeekByte(HInvoke * invoke)1531 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1532   CreateLongToIntLocations(allocator_, invoke);
1533 }
1534 
VisitMemoryPeekByte(HInvoke * invoke)1535 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1536   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1537 }
1538 
VisitMemoryPeekIntNative(HInvoke * invoke)1539 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1540   CreateLongToIntLocations(allocator_, invoke);
1541 }
1542 
VisitMemoryPeekIntNative(HInvoke * invoke)1543 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1544   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1545 }
1546 
VisitMemoryPeekLongNative(HInvoke * invoke)1547 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1548   CreateLongToLongLocations(allocator_, invoke);
1549 }
1550 
VisitMemoryPeekLongNative(HInvoke * invoke)1551 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1552   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1553 }
1554 
VisitMemoryPeekShortNative(HInvoke * invoke)1555 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1556   CreateLongToIntLocations(allocator_, invoke);
1557 }
1558 
VisitMemoryPeekShortNative(HInvoke * invoke)1559 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1560   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1561 }
1562 
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1563 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1564                                          DataType::Type size,
1565                                          HInvoke* invoke) {
1566   LocationSummary* locations =
1567       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1568   locations->SetInAt(0, Location::RequiresRegister());
1569   HInstruction* value = invoke->InputAt(1);
1570   if (size == DataType::Type::kInt8) {
1571     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1572   } else {
1573     locations->SetInAt(1, Location::RegisterOrConstant(value));
1574   }
1575 }
1576 
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1577 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1578   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1579   Location value_loc = locations->InAt(1);
1580   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1581   // to avoid a SIGBUS.
1582   switch (size) {
1583     case DataType::Type::kInt8:
1584       if (value_loc.IsConstant()) {
1585         __ movb(Address(address, 0),
1586                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1587       } else {
1588         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1589       }
1590       break;
1591     case DataType::Type::kInt16:
1592       if (value_loc.IsConstant()) {
1593         __ movw(Address(address, 0),
1594                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1595       } else {
1596         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1597       }
1598       break;
1599     case DataType::Type::kInt32:
1600       if (value_loc.IsConstant()) {
1601         __ movl(Address(address, 0),
1602                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1603       } else {
1604         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1605       }
1606       break;
1607     case DataType::Type::kInt64:
1608       if (value_loc.IsConstant()) {
1609         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1610         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1611         __ movl(Address(address, 4), Immediate(High32Bits(value)));
1612       } else {
1613         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1614         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1615       }
1616       break;
1617     default:
1618       LOG(FATAL) << "Type not recognized for poke: " << size;
1619       UNREACHABLE();
1620   }
1621 }
1622 
VisitMemoryPokeByte(HInvoke * invoke)1623 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1624   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1625 }
1626 
VisitMemoryPokeByte(HInvoke * invoke)1627 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1628   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1629 }
1630 
VisitMemoryPokeIntNative(HInvoke * invoke)1631 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1632   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1633 }
1634 
VisitMemoryPokeIntNative(HInvoke * invoke)1635 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1636   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1637 }
1638 
VisitMemoryPokeLongNative(HInvoke * invoke)1639 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1640   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1641 }
1642 
VisitMemoryPokeLongNative(HInvoke * invoke)1643 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1644   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1645 }
1646 
VisitMemoryPokeShortNative(HInvoke * invoke)1647 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1648   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1649 }
1650 
VisitMemoryPokeShortNative(HInvoke * invoke)1651 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1652   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1653 }
1654 
VisitThreadCurrentThread(HInvoke * invoke)1655 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1656   LocationSummary* locations =
1657       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1658   locations->SetOut(Location::RequiresRegister());
1659 }
1660 
VisitThreadCurrentThread(HInvoke * invoke)1661 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1662   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1663   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1664 }
1665 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1666 static void GenUnsafeGet(HInvoke* invoke,
1667                          DataType::Type type,
1668                          bool is_volatile,
1669                          CodeGeneratorX86* codegen) {
1670   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1671   LocationSummary* locations = invoke->GetLocations();
1672   Location base_loc = locations->InAt(1);
1673   Register base = base_loc.AsRegister<Register>();
1674   Location offset_loc = locations->InAt(2);
1675   Register offset = offset_loc.AsRegisterPairLow<Register>();
1676   Location output_loc = locations->Out();
1677 
1678   switch (type) {
1679     case DataType::Type::kInt8: {
1680       Register output = output_loc.AsRegister<Register>();
1681       __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1682       break;
1683     }
1684 
1685     case DataType::Type::kInt32: {
1686       Register output = output_loc.AsRegister<Register>();
1687       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1688       break;
1689     }
1690 
1691     case DataType::Type::kReference: {
1692       Register output = output_loc.AsRegister<Register>();
1693       if (codegen->EmitReadBarrier()) {
1694         if (kUseBakerReadBarrier) {
1695           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1696           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1697               invoke, output_loc, base, src, /* needs_null_check= */ false);
1698         } else {
1699           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1700           codegen->GenerateReadBarrierSlow(
1701               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1702         }
1703       } else {
1704         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1705         __ MaybeUnpoisonHeapReference(output);
1706       }
1707       break;
1708     }
1709 
1710     case DataType::Type::kInt64: {
1711         Register output_lo = output_loc.AsRegisterPairLow<Register>();
1712         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1713         if (is_volatile) {
1714           // Need to use a XMM to read atomically.
1715           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1716           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1717           __ movd(output_lo, temp);
1718           __ psrlq(temp, Immediate(32));
1719           __ movd(output_hi, temp);
1720         } else {
1721           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1722           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1723         }
1724       }
1725       break;
1726 
1727     default:
1728       LOG(FATAL) << "Unsupported op size " << type;
1729       UNREACHABLE();
1730   }
1731 }
1732 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen,DataType::Type type,bool is_volatile)1733 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1734                                           HInvoke* invoke,
1735                                           CodeGeneratorX86* codegen,
1736                                           DataType::Type type,
1737                                           bool is_volatile) {
1738   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1739   LocationSummary* locations =
1740       new (allocator) LocationSummary(invoke,
1741                                       can_call
1742                                           ? LocationSummary::kCallOnSlowPath
1743                                           : LocationSummary::kNoCall,
1744                                       kIntrinsified);
1745   if (can_call && kUseBakerReadBarrier) {
1746     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1747   }
1748   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1749   locations->SetInAt(1, Location::RequiresRegister());
1750   locations->SetInAt(2, Location::RequiresRegister());
1751   if (type == DataType::Type::kInt64) {
1752     if (is_volatile) {
1753       // Need to use XMM to read volatile.
1754       locations->AddTemp(Location::RequiresFpuRegister());
1755       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1756     } else {
1757       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1758     }
1759   } else {
1760     locations->SetOut(Location::RequiresRegister(),
1761                       (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1762   }
1763 }
1764 
VisitUnsafeGet(HInvoke * invoke)1765 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1766   VisitJdkUnsafeGet(invoke);
1767 }
VisitUnsafeGetVolatile(HInvoke * invoke)1768 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1769   VisitJdkUnsafeGetVolatile(invoke);
1770 }
VisitUnsafeGetLong(HInvoke * invoke)1771 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1772   VisitJdkUnsafeGetLong(invoke);
1773 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1774 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1775   VisitJdkUnsafeGetLongVolatile(invoke);
1776 }
VisitUnsafeGetObject(HInvoke * invoke)1777 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1778   VisitJdkUnsafeGetReference(invoke);
1779 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1780 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1781   VisitJdkUnsafeGetReferenceVolatile(invoke);
1782 }
VisitUnsafeGetByte(HInvoke * invoke)1783 void IntrinsicLocationsBuilderX86::VisitUnsafeGetByte(HInvoke* invoke) {
1784   VisitJdkUnsafeGetByte(invoke);
1785 }
1786 
VisitUnsafeGet(HInvoke * invoke)1787 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1788   VisitJdkUnsafeGet(invoke);
1789 }
VisitUnsafeGetVolatile(HInvoke * invoke)1790 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1791   VisitJdkUnsafeGetVolatile(invoke);
1792 }
VisitUnsafeGetLong(HInvoke * invoke)1793 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1794   VisitJdkUnsafeGetLong(invoke);
1795 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1796 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1797   VisitJdkUnsafeGetLongVolatile(invoke);
1798 }
VisitUnsafeGetObject(HInvoke * invoke)1799 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1800   VisitJdkUnsafeGetReference(invoke);
1801 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1802 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1803   VisitJdkUnsafeGetReferenceVolatile(invoke);
1804 }
VisitUnsafeGetByte(HInvoke * invoke)1805 void IntrinsicCodeGeneratorX86::VisitUnsafeGetByte(HInvoke* invoke) {
1806   VisitJdkUnsafeGetByte(invoke);
1807 }
1808 
VisitJdkUnsafeGet(HInvoke * invoke)1809 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1810   CreateIntIntIntToIntLocations(
1811       allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ false);
1812 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1813 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1814   CreateIntIntIntToIntLocations(
1815       allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
1816 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1817 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1818   CreateIntIntIntToIntLocations(
1819       allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
1820 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1821 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1822   CreateIntIntIntToIntLocations(
1823       allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ false);
1824 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1825 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1826   CreateIntIntIntToIntLocations(
1827       allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
1828 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1829 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1830   CreateIntIntIntToIntLocations(
1831       allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
1832 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1833 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1834   CreateIntIntIntToIntLocations(
1835       allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ false);
1836 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1837 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1838   CreateIntIntIntToIntLocations(
1839       allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
1840 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1841 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1842   CreateIntIntIntToIntLocations(
1843       allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
1844 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1845 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1846   CreateIntIntIntToIntLocations(
1847       allocator_, invoke, codegen_, DataType::Type::kInt8, /*is_volatile=*/ false);
1848 }
1849 
VisitJdkUnsafeGet(HInvoke * invoke)1850 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1851   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1852 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1853 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1854   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1855 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1856 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1857   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1858 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1859 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1860   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
1861 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1862 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1863   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1864 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1865 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1866   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1867 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1868 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1869   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
1870 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1871 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1872   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1873 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1874 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1875   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1876 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1877 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1878   GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
1879 }
1880 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1881 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1882                                                        DataType::Type type,
1883                                                        HInvoke* invoke,
1884                                                        bool is_volatile) {
1885   LocationSummary* locations =
1886       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1887   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1888   locations->SetInAt(1, Location::RequiresRegister());
1889   locations->SetInAt(2, Location::RequiresRegister());
1890   locations->SetInAt(3, Location::RequiresRegister());
1891   if (type == DataType::Type::kReference) {
1892     // Need temp registers for card-marking.
1893     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1894     // Ensure the value is in a byte register.
1895     locations->AddTemp(Location::RegisterLocation(ECX));
1896   } else if (type == DataType::Type::kInt64 && is_volatile) {
1897     locations->AddTemp(Location::RequiresFpuRegister());
1898     locations->AddTemp(Location::RequiresFpuRegister());
1899   }
1900 }
1901 
VisitUnsafePut(HInvoke * invoke)1902 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1903   VisitJdkUnsafePut(invoke);
1904 }
VisitUnsafePutOrdered(HInvoke * invoke)1905 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1906   VisitJdkUnsafePutOrdered(invoke);
1907 }
VisitUnsafePutVolatile(HInvoke * invoke)1908 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1909   VisitJdkUnsafePutVolatile(invoke);
1910 }
VisitUnsafePutObject(HInvoke * invoke)1911 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1912   VisitJdkUnsafePutReference(invoke);
1913 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1914 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1915   VisitJdkUnsafePutObjectOrdered(invoke);
1916 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1917 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1918   VisitJdkUnsafePutReferenceVolatile(invoke);
1919 }
VisitUnsafePutLong(HInvoke * invoke)1920 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1921   VisitJdkUnsafePutLong(invoke);
1922 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1923 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1924   VisitJdkUnsafePutLongOrdered(invoke);
1925 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1926 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1927   VisitJdkUnsafePutLongVolatile(invoke);
1928 }
VisitUnsafePutByte(HInvoke * invoke)1929 void IntrinsicLocationsBuilderX86::VisitUnsafePutByte(HInvoke* invoke) {
1930   VisitJdkUnsafePutByte(invoke);
1931 }
1932 
VisitJdkUnsafePut(HInvoke * invoke)1933 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePut(HInvoke* invoke) {
1934   CreateIntIntIntIntToVoidPlusTempsLocations(
1935       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
1936 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)1937 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
1938   CreateIntIntIntIntToVoidPlusTempsLocations(
1939       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
1940 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)1941 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
1942   CreateIntIntIntIntToVoidPlusTempsLocations(
1943       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
1944 }
VisitJdkUnsafePutRelease(HInvoke * invoke)1945 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
1946   CreateIntIntIntIntToVoidPlusTempsLocations(
1947       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
1948 }
VisitJdkUnsafePutReference(HInvoke * invoke)1949 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
1950   CreateIntIntIntIntToVoidPlusTempsLocations(
1951       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
1952 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
1954   CreateIntIntIntIntToVoidPlusTempsLocations(
1955       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
1956 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)1957 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
1958   CreateIntIntIntIntToVoidPlusTempsLocations(
1959       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
1960 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)1961 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
1962   CreateIntIntIntIntToVoidPlusTempsLocations(
1963       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
1964 }
VisitJdkUnsafePutLong(HInvoke * invoke)1965 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
1966   CreateIntIntIntIntToVoidPlusTempsLocations(
1967       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
1968 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)1969 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
1970   CreateIntIntIntIntToVoidPlusTempsLocations(
1971       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
1972 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)1973 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
1974   CreateIntIntIntIntToVoidPlusTempsLocations(
1975       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
1976 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)1977 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
1978   CreateIntIntIntIntToVoidPlusTempsLocations(
1979       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
1980 }
VisitJdkUnsafePutByte(HInvoke * invoke)1981 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
1982   CreateIntIntIntIntToVoidPlusTempsLocations(
1983       allocator_, DataType::Type::kInt8, invoke, /*is_volatile=*/ false);
1984 }
1985 
1986 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1987 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1988 static void GenUnsafePut(LocationSummary* locations,
1989                          DataType::Type type,
1990                          bool is_volatile,
1991                          CodeGeneratorX86* codegen) {
1992   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1993   Register base = locations->InAt(1).AsRegister<Register>();
1994   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1995   Location value_loc = locations->InAt(3);
1996 
1997   if (type == DataType::Type::kInt64) {
1998     Register value_lo = value_loc.AsRegisterPairLow<Register>();
1999     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2000     if (is_volatile) {
2001       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2002       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2003       __ movd(temp1, value_lo);
2004       __ movd(temp2, value_hi);
2005       __ punpckldq(temp1, temp2);
2006       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2007     } else {
2008       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2009       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2010     }
2011   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2012     Register temp = locations->GetTemp(0).AsRegister<Register>();
2013     __ movl(temp, value_loc.AsRegister<Register>());
2014     __ PoisonHeapReference(temp);
2015     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2016   } else {
2017     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2018   }
2019 
2020   if (is_volatile) {
2021     codegen->MemoryFence();
2022   }
2023 
2024   if (type == DataType::Type::kReference) {
2025     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2026     codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2027                              locations->GetTemp(1).AsRegister<Register>(),
2028                              base,
2029                              value_loc.AsRegister<Register>(),
2030                              value_can_be_null);
2031   }
2032 }
2033 
VisitUnsafePut(HInvoke * invoke)2034 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
2035   VisitJdkUnsafePut(invoke);
2036 }
VisitUnsafePutOrdered(HInvoke * invoke)2037 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2038   VisitJdkUnsafePutOrdered(invoke);
2039 }
VisitUnsafePutVolatile(HInvoke * invoke)2040 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2041   VisitJdkUnsafePutVolatile(invoke);
2042 }
VisitUnsafePutObject(HInvoke * invoke)2043 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
2044   VisitJdkUnsafePutReference(invoke);
2045 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2046 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2047   VisitJdkUnsafePutObjectOrdered(invoke);
2048 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2049 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2050   VisitJdkUnsafePutReferenceVolatile(invoke);
2051 }
VisitUnsafePutLong(HInvoke * invoke)2052 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
2053   VisitJdkUnsafePutLong(invoke);
2054 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2055 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2056   VisitJdkUnsafePutLongOrdered(invoke);
2057 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2058 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2059   VisitJdkUnsafePutLongVolatile(invoke);
2060 }
VisitUnsafePutByte(HInvoke * invoke)2061 void IntrinsicCodeGeneratorX86::VisitUnsafePutByte(HInvoke* invoke) {
2062   VisitJdkUnsafePutByte(invoke);
2063 }
2064 
VisitJdkUnsafePut(HInvoke * invoke)2065 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePut(HInvoke* invoke) {
2066   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2067 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2068 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2069   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2070 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2071 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2072   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2073 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2074 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2075   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2076 }
VisitJdkUnsafePutReference(HInvoke * invoke)2077 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
2078   GenUnsafePut(
2079       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2080 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2081 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2082   GenUnsafePut(
2083       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2084 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2085 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2086   GenUnsafePut(
2087       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2088 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2089 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2090   GenUnsafePut(
2091       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2092 }
VisitJdkUnsafePutLong(HInvoke * invoke)2093 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
2094   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2095 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2096 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2097   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2098 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2099 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2100   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2101 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2102 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2103   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2104 }
VisitJdkUnsafePutByte(HInvoke * invoke)2105 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
2106   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
2107 }
2108 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,DataType::Type type,HInvoke * invoke)2109 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
2110                                        CodeGeneratorX86* codegen,
2111                                        DataType::Type type,
2112                                        HInvoke* invoke) {
2113   const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2114   LocationSummary* locations =
2115       new (allocator) LocationSummary(invoke,
2116                                       can_call
2117                                           ? LocationSummary::kCallOnSlowPath
2118                                           : LocationSummary::kNoCall,
2119                                       kIntrinsified);
2120   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2121   locations->SetInAt(1, Location::RequiresRegister());
2122   // Offset is a long, but in 32 bit mode, we only need the low word.
2123   // Can we update the invoke here to remove a TypeConvert to Long?
2124   locations->SetInAt(2, Location::RequiresRegister());
2125   // Expected value must be in EAX or EDX:EAX.
2126   // For long, new value must be in ECX:EBX.
2127   if (type == DataType::Type::kInt64) {
2128     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2129     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2130   } else {
2131     locations->SetInAt(3, Location::RegisterLocation(EAX));
2132     locations->SetInAt(4, Location::RequiresRegister());
2133   }
2134 
2135   // Force a byte register for the output.
2136   locations->SetOut(Location::RegisterLocation(EAX));
2137   if (type == DataType::Type::kReference) {
2138     // Need temporary registers for card-marking, and possibly for
2139     // (Baker) read barrier.
2140     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2141     // Need a byte register for marking.
2142     locations->AddTemp(Location::RegisterLocation(ECX));
2143   }
2144 }
2145 
VisitUnsafeCASInt(HInvoke * invoke)2146 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2147   VisitJdkUnsafeCASInt(invoke);
2148 }
2149 
VisitUnsafeCASLong(HInvoke * invoke)2150 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2151   VisitJdkUnsafeCASLong(invoke);
2152 }
2153 
VisitUnsafeCASObject(HInvoke * invoke)2154 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
2155   VisitJdkUnsafeCASObject(invoke);
2156 }
2157 
VisitJdkUnsafeCASInt(HInvoke * invoke)2158 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2159   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2160   VisitJdkUnsafeCompareAndSetInt(invoke);
2161 }
2162 
VisitJdkUnsafeCASLong(HInvoke * invoke)2163 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2164   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2165   VisitJdkUnsafeCompareAndSetLong(invoke);
2166 }
2167 
VisitJdkUnsafeCASObject(HInvoke * invoke)2168 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2169   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2170   VisitJdkUnsafeCompareAndSetReference(invoke);
2171 }
2172 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2173 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2174   CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt32, invoke);
2175 }
2176 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2177 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2178   CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt64, invoke);
2179 }
2180 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2181 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2182   // The only supported read barrier implementation is the Baker-style read barriers.
2183   if (codegen_->EmitNonBakerReadBarrier()) {
2184     return;
2185   }
2186 
2187   CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kReference, invoke);
2188 }
2189 
GenPrimitiveLockedCmpxchg(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp=Register::kNoRegister)2190 static void GenPrimitiveLockedCmpxchg(DataType::Type type,
2191                                       CodeGeneratorX86* codegen,
2192                                       Location expected_value,
2193                                       Location new_value,
2194                                       Register base,
2195                                       Register offset,
2196                                       // Only necessary for floating point
2197                                       Register temp = Register::kNoRegister) {
2198   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2199 
2200   if (DataType::Kind(type) == DataType::Type::kInt32) {
2201     DCHECK_EQ(expected_value.AsRegister<Register>(), EAX);
2202   }
2203 
2204   // The address of the field within the holding object.
2205   Address field_addr(base, offset, TIMES_1, 0);
2206 
2207   switch (type) {
2208     case DataType::Type::kBool:
2209     case DataType::Type::kInt8:
2210       __ LockCmpxchgb(field_addr, new_value.AsRegister<ByteRegister>());
2211       break;
2212     case DataType::Type::kInt16:
2213     case DataType::Type::kUint16:
2214       __ LockCmpxchgw(field_addr, new_value.AsRegister<Register>());
2215       break;
2216     case DataType::Type::kInt32:
2217       __ LockCmpxchgl(field_addr, new_value.AsRegister<Register>());
2218       break;
2219     case DataType::Type::kFloat32: {
2220       // cmpxchg requires the expected value to be in EAX so the new value must be elsewhere.
2221       DCHECK_NE(temp, EAX);
2222       // EAX is both an input and an output for cmpxchg
2223       codegen->Move32(Location::RegisterLocation(EAX), expected_value);
2224       codegen->Move32(Location::RegisterLocation(temp), new_value);
2225       __ LockCmpxchgl(field_addr, temp);
2226       break;
2227     }
2228     case DataType::Type::kInt64:
2229       // Ensure the expected value is in EAX:EDX and that the new
2230       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2231       DCHECK_EQ(expected_value.AsRegisterPairLow<Register>(), EAX);
2232       DCHECK_EQ(expected_value.AsRegisterPairHigh<Register>(), EDX);
2233       DCHECK_EQ(new_value.AsRegisterPairLow<Register>(), EBX);
2234       DCHECK_EQ(new_value.AsRegisterPairHigh<Register>(), ECX);
2235       __ LockCmpxchg8b(field_addr);
2236       break;
2237     default:
2238       LOG(FATAL) << "Unexpected CAS type " << type;
2239   }
2240   // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2241   // don't need scheduling barriers at this time.
2242 }
2243 
GenPrimitiveCAS(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Location out,Register temp=Register::kNoRegister,bool is_cmpxchg=false)2244 static void GenPrimitiveCAS(DataType::Type type,
2245                             CodeGeneratorX86* codegen,
2246                             Location expected_value,
2247                             Location new_value,
2248                             Register base,
2249                             Register offset,
2250                             Location out,
2251                             // Only necessary for floating point
2252                             Register temp = Register::kNoRegister,
2253                             bool is_cmpxchg = false) {
2254   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2255 
2256   if (!is_cmpxchg || DataType::Kind(type) == DataType::Type::kInt32) {
2257     DCHECK_EQ(out.AsRegister<Register>(), EAX);
2258   }
2259 
2260   GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp);
2261 
2262   if (is_cmpxchg) {
2263     // Sign-extend, zero-extend or move the result if necessary
2264     switch (type) {
2265       case DataType::Type::kBool:
2266         __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2267         break;
2268       case DataType::Type::kInt8:
2269         __ movsxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2270         break;
2271       case DataType::Type::kInt16:
2272         __ movsxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2273         break;
2274       case DataType::Type::kUint16:
2275         __ movzxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2276         break;
2277       case DataType::Type::kFloat32:
2278         __ movd(out.AsFpuRegister<XmmRegister>(), EAX);
2279         break;
2280       default:
2281         // Nothing to do
2282         break;
2283     }
2284   } else {
2285     // Convert ZF into the Boolean result.
2286     __ setb(kZero, out.AsRegister<Register>());
2287     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2288   }
2289 }
2290 
GenReferenceCAS(HInvoke * invoke,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp,Register temp2,bool is_cmpxchg=false)2291 static void GenReferenceCAS(HInvoke* invoke,
2292                             CodeGeneratorX86* codegen,
2293                             Location expected_value,
2294                             Location new_value,
2295                             Register base,
2296                             Register offset,
2297                             Register temp,
2298                             Register temp2,
2299                             bool is_cmpxchg = false) {
2300   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2301   LocationSummary* locations = invoke->GetLocations();
2302   Location out = locations->Out();
2303 
2304   // The address of the field within the holding object.
2305   Address field_addr(base, offset, TIMES_1, 0);
2306 
2307   Register value = new_value.AsRegister<Register>();
2308   Register expected = expected_value.AsRegister<Register>();
2309   DCHECK_EQ(expected, EAX);
2310   DCHECK_NE(temp, temp2);
2311 
2312   if (codegen->EmitBakerReadBarrier()) {
2313     // Need to make sure the reference stored in the field is a to-space
2314     // one before attempting the CAS or the CAS could fail incorrectly.
2315     codegen->GenerateReferenceLoadWithBakerReadBarrier(
2316         invoke,
2317         // Unused, used only as a "temporary" within the read barrier.
2318         Location::RegisterLocation(temp),
2319         base,
2320         field_addr,
2321         /* needs_null_check= */ false,
2322         /* always_update_field= */ true,
2323         &temp2);
2324   }
2325   bool base_equals_value = (base == value);
2326   if (kPoisonHeapReferences) {
2327     if (base_equals_value) {
2328       // If `base` and `value` are the same register location, move
2329       // `value` to a temporary register.  This way, poisoning
2330       // `value` won't invalidate `base`.
2331       value = temp;
2332       __ movl(value, base);
2333     }
2334 
2335     // Check that the register allocator did not assign the location
2336     // of `expected` (EAX) to `value` nor to `base`, so that heap
2337     // poisoning (when enabled) works as intended below.
2338     // - If `value` were equal to `expected`, both references would
2339     //   be poisoned twice, meaning they would not be poisoned at
2340     //   all, as heap poisoning uses address negation.
2341     // - If `base` were equal to `expected`, poisoning `expected`
2342     //   would invalidate `base`.
2343     DCHECK_NE(value, expected);
2344     DCHECK_NE(base, expected);
2345     __ PoisonHeapReference(expected);
2346     __ PoisonHeapReference(value);
2347   }
2348   __ LockCmpxchgl(field_addr, value);
2349 
2350   // LOCK CMPXCHG has full barrier semantics, and we don't need
2351   // scheduling barriers at this time.
2352 
2353   if (is_cmpxchg) {
2354     DCHECK_EQ(out.AsRegister<Register>(), EAX);
2355     __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
2356   } else {
2357     // Convert ZF into the Boolean result.
2358     __ setb(kZero, out.AsRegister<Register>());
2359     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2360   }
2361 
2362   // Mark card for object if the new value is stored.
2363   bool value_can_be_null = true;  // TODO: Worth finding out this information?
2364   NearLabel skip_mark_gc_card;
2365   __ j(kNotZero, &skip_mark_gc_card);
2366   codegen->MaybeMarkGCCard(temp, temp2, base, value, value_can_be_null);
2367   __ Bind(&skip_mark_gc_card);
2368 
2369   // If heap poisoning is enabled, we need to unpoison the values
2370   // that were poisoned earlier.
2371   if (kPoisonHeapReferences) {
2372     if (base_equals_value) {
2373       // `value` has been moved to a temporary register, no need to
2374       // unpoison it.
2375     } else {
2376       // Ensure `value` is different from `out`, so that unpoisoning
2377       // the former does not invalidate the latter.
2378       DCHECK_NE(value, out.AsRegister<Register>());
2379       __ UnpoisonHeapReference(value);
2380     }
2381   }
2382   // Do not unpoison the reference contained in register
2383   // `expected`, as it is the same as register `out` (EAX).
2384 }
2385 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2386 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2387   LocationSummary* locations = invoke->GetLocations();
2388 
2389   Register base = locations->InAt(1).AsRegister<Register>();
2390   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2391   Location expected_value = locations->InAt(3);
2392   Location new_value = locations->InAt(4);
2393   Location out = locations->Out();
2394   DCHECK_EQ(out.AsRegister<Register>(), EAX);
2395 
2396   if (type == DataType::Type::kReference) {
2397     // The only read barrier implementation supporting the
2398     // UnsafeCASObject intrinsic is the Baker-style read barriers.
2399     DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2400 
2401     Register temp = locations->GetTemp(0).AsRegister<Register>();
2402     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2403     GenReferenceCAS(invoke, codegen, expected_value, new_value, base, offset, temp, temp2);
2404   } else {
2405     DCHECK(!DataType::IsFloatingPointType(type));
2406     GenPrimitiveCAS(type, codegen, expected_value, new_value, base, offset, out);
2407   }
2408 }
2409 
VisitUnsafeCASInt(HInvoke * invoke)2410 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2411   VisitJdkUnsafeCASInt(invoke);
2412 }
2413 
VisitUnsafeCASLong(HInvoke * invoke)2414 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2415   VisitJdkUnsafeCASLong(invoke);
2416 }
2417 
VisitUnsafeCASObject(HInvoke * invoke)2418 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2419   // The only read barrier implementation supporting the
2420   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2421   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2422 
2423   GenCAS(DataType::Type::kReference, invoke, codegen_);
2424 }
2425 
VisitJdkUnsafeCASInt(HInvoke * invoke)2426 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2427   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2428   VisitJdkUnsafeCompareAndSetInt(invoke);
2429 }
2430 
VisitJdkUnsafeCASLong(HInvoke * invoke)2431 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2432   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2433   VisitJdkUnsafeCompareAndSetLong(invoke);
2434 }
2435 
VisitJdkUnsafeCASObject(HInvoke * invoke)2436 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2437   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2438   VisitJdkUnsafeCompareAndSetReference(invoke);
2439 }
2440 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2441 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2442   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2443 }
2444 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2445 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2446   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2447 }
2448 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2449 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2450   // The only supported read barrier implementation is the Baker-style read barriers.
2451   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2452 
2453   GenCAS(DataType::Type::kReference, invoke, codegen_);
2454 }
2455 
2456 // Note: Unlike other architectures that use corresponding enums for the `VarHandle`
2457 // implementation, x86 is currently using it only for `Unsafe`.
2458 enum class GetAndUpdateOp {
2459   kSet,
2460   kAdd,
2461 };
2462 
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen,DataType::Type type,GetAndUpdateOp get_and_unsafe_op)2463 void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2464                                        HInvoke* invoke,
2465                                        CodeGeneratorX86* codegen,
2466                                        DataType::Type type,
2467                                        GetAndUpdateOp get_and_unsafe_op) {
2468   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2469   LocationSummary* locations =
2470       new (allocator) LocationSummary(invoke,
2471                                       can_call
2472                                           ? LocationSummary::kCallOnSlowPath
2473                                           : LocationSummary::kNoCall,
2474                                       kIntrinsified);
2475   if (can_call && kUseBakerReadBarrier) {
2476     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2477   }
2478   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2479   if (type == DataType::Type::kInt64) {
2480     // Explicitly allocate all registers.
2481     locations->SetInAt(1, Location::RegisterLocation(EBP));
2482     if (get_and_unsafe_op == GetAndUpdateOp::kAdd) {
2483       locations->AddTemp(Location::RegisterLocation(EBP));  // We shall clobber EBP.
2484       locations->SetInAt(2, Location::Any());  // Offset shall be on the stack.
2485       locations->SetInAt(3, Location::RegisterPairLocation(ESI, EDI));
2486       locations->AddTemp(Location::RegisterLocation(EBX));
2487       locations->AddTemp(Location::RegisterLocation(ECX));
2488     } else {
2489       locations->SetInAt(2, Location::RegisterPairLocation(ESI, EDI));
2490       locations->SetInAt(3, Location::RegisterPairLocation(EBX, ECX));
2491     }
2492     locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap);
2493   } else {
2494     locations->SetInAt(1, Location::RequiresRegister());
2495     locations->SetInAt(2, Location::RequiresRegister());
2496     // Use the same register for both the output and the new value or addend
2497     // to take advantage of XCHG or XADD. Arbitrarily pick EAX.
2498     locations->SetInAt(3, Location::RegisterLocation(EAX));
2499     locations->SetOut(Location::RegisterLocation(EAX));
2500   }
2501 }
2502 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2503 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2504   VisitJdkUnsafeGetAndAddInt(invoke);
2505 }
2506 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2507 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2508   VisitJdkUnsafeGetAndAddLong(invoke);
2509 }
2510 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2511 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2512   VisitJdkUnsafeGetAndSetInt(invoke);
2513 }
2514 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2515 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2516   VisitJdkUnsafeGetAndSetLong(invoke);
2517 }
2518 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2519 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2520   VisitJdkUnsafeGetAndSetReference(invoke);
2521 }
2522 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2523 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2524   CreateUnsafeGetAndUpdateLocations(
2525       allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
2526 }
2527 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2528 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2529   CreateUnsafeGetAndUpdateLocations(
2530       allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
2531 }
2532 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2533 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2534   CreateUnsafeGetAndUpdateLocations(
2535       allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
2536 }
2537 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2538 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2539   CreateUnsafeGetAndUpdateLocations(
2540       allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
2541 }
2542 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2543 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2544   // The only supported read barrier implementation is the Baker-style read barriers.
2545   if (codegen_->EmitNonBakerReadBarrier()) {
2546     return;
2547   }
2548 
2549   CreateUnsafeGetAndUpdateLocations(
2550       allocator_, invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
2551   LocationSummary* locations = invoke->GetLocations();
2552   locations->AddTemp(Location::RequiresRegister());
2553   locations->AddTemp(Location::RegisterLocation(ECX));  // Byte register for `MarkGCCard()`.
2554 }
2555 
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86 * codegen,GetAndUpdateOp get_and_update_op)2556 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2557                                   DataType::Type type,
2558                                   CodeGeneratorX86* codegen,
2559                                   GetAndUpdateOp get_and_update_op) {
2560   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2561   LocationSummary* locations = invoke->GetLocations();
2562 
2563   Location out = locations->Out();                            // Result.
2564   Register base = locations->InAt(1).AsRegister<Register>();  // Object pointer.
2565   Location offset = locations->InAt(2);                       // Long offset.
2566   Location arg = locations->InAt(3);                          // New value or addend.
2567 
2568   if (type == DataType::Type::kInt32) {
2569     DCHECK(out.Equals(arg));
2570     Register out_reg = out.AsRegister<Register>();
2571     Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2572     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2573       __ LockXaddl(field_address, out_reg);
2574     } else {
2575       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2576       __ xchgl(out_reg, field_address);
2577     }
2578   } else if (type == DataType::Type::kInt64) {
2579     // Prepare the field address. Ignore the high 32 bits of the `offset`.
2580     Address field_address_low(kNoRegister, 0), field_address_high(kNoRegister, 0);
2581     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2582       DCHECK(offset.IsDoubleStackSlot());
2583       __ addl(base, Address(ESP, offset.GetStackIndex()));  // Clobbers `base`.
2584       DCHECK(Location::RegisterLocation(base).Equals(locations->GetTemp(0)));
2585       field_address_low = Address(base, 0);
2586       field_address_high = Address(base, 4);
2587     } else {
2588       field_address_low = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2589       field_address_high = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 4);
2590     }
2591     // Load the old value to EDX:EAX and use LOCK CMPXCHG8B to set the new value.
2592     NearLabel loop;
2593     __ Bind(&loop);
2594     __ movl(EAX, field_address_low);
2595     __ movl(EDX, field_address_high);
2596     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2597       DCHECK(Location::RegisterPairLocation(ESI, EDI).Equals(arg));
2598       __ movl(EBX, EAX);
2599       __ movl(ECX, EDX);
2600       __ addl(EBX, ESI);
2601       __ adcl(ECX, EDI);
2602     } else {
2603       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2604       DCHECK(Location::RegisterPairLocation(EBX, ECX).Equals(arg));
2605     }
2606     __ LockCmpxchg8b(field_address_low);
2607     __ j(kNotEqual, &loop);  // Repeat on failure.
2608   } else {
2609     DCHECK_EQ(type, DataType::Type::kReference);
2610     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2611     Register out_reg = out.AsRegister<Register>();
2612     Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2613     Register temp1 = locations->GetTemp(0).AsRegister<Register>();
2614     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2615 
2616     if (codegen->EmitReadBarrier()) {
2617       DCHECK(kUseBakerReadBarrier);
2618       // Ensure that the field contains a to-space reference.
2619       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2620           invoke,
2621           Location::RegisterLocation(temp2),
2622           base,
2623           field_address,
2624           /*needs_null_check=*/ false,
2625           /*always_update_field=*/ true,
2626           &temp1);
2627     }
2628 
2629     // Mark card for object as a new value shall be stored.
2630     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
2631     DCHECK_EQ(temp2, ECX);  // Byte register for `MarkGCCard()`.
2632     codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_reg, new_value_can_be_null);
2633 
2634     if (kPoisonHeapReferences) {
2635       // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2636       // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2637       __ movl(temp1, out_reg);
2638       __ PoisonHeapReference(temp1);
2639       __ xchgl(temp1, field_address);
2640       __ UnpoisonHeapReference(temp1);
2641       __ movl(out_reg, temp1);
2642     } else {
2643       __ xchgl(out_reg, field_address);
2644     }
2645   }
2646 }
2647 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2648 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2649   VisitJdkUnsafeGetAndAddInt(invoke);
2650 }
2651 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2652 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2653   VisitJdkUnsafeGetAndAddLong(invoke);
2654 }
2655 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2656 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2657   VisitJdkUnsafeGetAndSetInt(invoke);
2658 }
2659 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2660 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2661   VisitJdkUnsafeGetAndSetLong(invoke);
2662 }
2663 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2664 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2665   VisitJdkUnsafeGetAndSetReference(invoke);
2666 }
2667 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2668 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2669   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2670 }
2671 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2672 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2673   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2674 }
2675 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2676 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2677   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2678 }
2679 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2680 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2681   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2682 }
2683 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2684 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2685   GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2686 }
2687 
VisitIntegerReverse(HInvoke * invoke)2688 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2689   LocationSummary* locations =
2690       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2691   locations->SetInAt(0, Location::RequiresRegister());
2692   locations->SetOut(Location::SameAsFirstInput());
2693   locations->AddTemp(Location::RequiresRegister());
2694 }
2695 
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2696 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2697                      X86Assembler* assembler) {
2698   Immediate imm_shift(shift);
2699   Immediate imm_mask(mask);
2700   __ movl(temp, reg);
2701   __ shrl(reg, imm_shift);
2702   __ andl(temp, imm_mask);
2703   __ andl(reg, imm_mask);
2704   __ shll(temp, imm_shift);
2705   __ orl(reg, temp);
2706 }
2707 
VisitIntegerReverse(HInvoke * invoke)2708 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2709   X86Assembler* assembler = GetAssembler();
2710   LocationSummary* locations = invoke->GetLocations();
2711 
2712   Register reg = locations->InAt(0).AsRegister<Register>();
2713   Register temp = locations->GetTemp(0).AsRegister<Register>();
2714 
2715   /*
2716    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2717    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2718    * compared to generic luni implementation which has 5 rounds of swapping bits.
2719    * x = bswap x
2720    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2721    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2722    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2723    */
2724   __ bswapl(reg);
2725   SwapBits(reg, temp, 1, 0x55555555, assembler);
2726   SwapBits(reg, temp, 2, 0x33333333, assembler);
2727   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2728 }
2729 
VisitLongReverse(HInvoke * invoke)2730 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2731   LocationSummary* locations =
2732       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2733   locations->SetInAt(0, Location::RequiresRegister());
2734   locations->SetOut(Location::SameAsFirstInput());
2735   locations->AddTemp(Location::RequiresRegister());
2736 }
2737 
VisitLongReverse(HInvoke * invoke)2738 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2739   X86Assembler* assembler = GetAssembler();
2740   LocationSummary* locations = invoke->GetLocations();
2741 
2742   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2743   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2744   Register temp = locations->GetTemp(0).AsRegister<Register>();
2745 
2746   // We want to swap high/low, then bswap each one, and then do the same
2747   // as a 32 bit reverse.
2748   // Exchange high and low.
2749   __ movl(temp, reg_low);
2750   __ movl(reg_low, reg_high);
2751   __ movl(reg_high, temp);
2752 
2753   // bit-reverse low
2754   __ bswapl(reg_low);
2755   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2756   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2757   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2758 
2759   // bit-reverse high
2760   __ bswapl(reg_high);
2761   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2762   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2763   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2764 }
2765 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2766 static void CreateBitCountLocations(
2767     ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2768   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2769     // Do nothing if there is no popcnt support. This results in generating
2770     // a call for the intrinsic rather than direct code.
2771     return;
2772   }
2773   LocationSummary* locations =
2774       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2775   if (is_long) {
2776     locations->AddTemp(Location::RequiresRegister());
2777   }
2778   locations->SetInAt(0, Location::Any());
2779   locations->SetOut(Location::RequiresRegister());
2780 }
2781 
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2782 static void GenBitCount(X86Assembler* assembler,
2783                         CodeGeneratorX86* codegen,
2784                         HInvoke* invoke, bool is_long) {
2785   LocationSummary* locations = invoke->GetLocations();
2786   Location src = locations->InAt(0);
2787   Register out = locations->Out().AsRegister<Register>();
2788 
2789   if (invoke->InputAt(0)->IsConstant()) {
2790     // Evaluate this at compile time.
2791     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2792     int32_t result = is_long
2793         ? POPCOUNT(static_cast<uint64_t>(value))
2794         : POPCOUNT(static_cast<uint32_t>(value));
2795     codegen->Load32BitValue(out, result);
2796     return;
2797   }
2798 
2799   // Handle the non-constant cases.
2800   if (!is_long) {
2801     if (src.IsRegister()) {
2802       __ popcntl(out, src.AsRegister<Register>());
2803     } else {
2804       DCHECK(src.IsStackSlot());
2805       __ popcntl(out, Address(ESP, src.GetStackIndex()));
2806     }
2807   } else {
2808     // The 64-bit case needs to worry about two parts.
2809     Register temp = locations->GetTemp(0).AsRegister<Register>();
2810     if (src.IsRegisterPair()) {
2811       __ popcntl(temp, src.AsRegisterPairLow<Register>());
2812       __ popcntl(out, src.AsRegisterPairHigh<Register>());
2813     } else {
2814       DCHECK(src.IsDoubleStackSlot());
2815       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2816       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2817     }
2818     __ addl(out, temp);
2819   }
2820 }
2821 
VisitIntegerBitCount(HInvoke * invoke)2822 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2823   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2824 }
2825 
VisitIntegerBitCount(HInvoke * invoke)2826 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2827   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2828 }
2829 
VisitLongBitCount(HInvoke * invoke)2830 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2831   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2832 }
2833 
VisitLongBitCount(HInvoke * invoke)2834 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2835   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2836 }
2837 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2838 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2839   LocationSummary* locations =
2840       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2841   if (is_long) {
2842     locations->SetInAt(0, Location::RequiresRegister());
2843   } else {
2844     locations->SetInAt(0, Location::Any());
2845   }
2846   locations->SetOut(Location::RequiresRegister());
2847 }
2848 
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2849 static void GenLeadingZeros(X86Assembler* assembler,
2850                             CodeGeneratorX86* codegen,
2851                             HInvoke* invoke, bool is_long) {
2852   LocationSummary* locations = invoke->GetLocations();
2853   Location src = locations->InAt(0);
2854   Register out = locations->Out().AsRegister<Register>();
2855 
2856   if (invoke->InputAt(0)->IsConstant()) {
2857     // Evaluate this at compile time.
2858     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2859     if (value == 0) {
2860       value = is_long ? 64 : 32;
2861     } else {
2862       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2863     }
2864     codegen->Load32BitValue(out, value);
2865     return;
2866   }
2867 
2868   // Handle the non-constant cases.
2869   if (!is_long) {
2870     if (src.IsRegister()) {
2871       __ bsrl(out, src.AsRegister<Register>());
2872     } else {
2873       DCHECK(src.IsStackSlot());
2874       __ bsrl(out, Address(ESP, src.GetStackIndex()));
2875     }
2876 
2877     // BSR sets ZF if the input was zero, and the output is undefined.
2878     NearLabel all_zeroes, done;
2879     __ j(kEqual, &all_zeroes);
2880 
2881     // Correct the result from BSR to get the final CLZ result.
2882     __ xorl(out, Immediate(31));
2883     __ jmp(&done);
2884 
2885     // Fix the zero case with the expected result.
2886     __ Bind(&all_zeroes);
2887     __ movl(out, Immediate(32));
2888 
2889     __ Bind(&done);
2890     return;
2891   }
2892 
2893   // 64 bit case needs to worry about both parts of the register.
2894   DCHECK(src.IsRegisterPair());
2895   Register src_lo = src.AsRegisterPairLow<Register>();
2896   Register src_hi = src.AsRegisterPairHigh<Register>();
2897   NearLabel handle_low, done, all_zeroes;
2898 
2899   // Is the high word zero?
2900   __ testl(src_hi, src_hi);
2901   __ j(kEqual, &handle_low);
2902 
2903   // High word is not zero. We know that the BSR result is defined in this case.
2904   __ bsrl(out, src_hi);
2905 
2906   // Correct the result from BSR to get the final CLZ result.
2907   __ xorl(out, Immediate(31));
2908   __ jmp(&done);
2909 
2910   // High word was zero.  We have to compute the low word count and add 32.
2911   __ Bind(&handle_low);
2912   __ bsrl(out, src_lo);
2913   __ j(kEqual, &all_zeroes);
2914 
2915   // We had a valid result.  Use an XOR to both correct the result and add 32.
2916   __ xorl(out, Immediate(63));
2917   __ jmp(&done);
2918 
2919   // All zero case.
2920   __ Bind(&all_zeroes);
2921   __ movl(out, Immediate(64));
2922 
2923   __ Bind(&done);
2924 }
2925 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2926 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2927   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2928 }
2929 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2930 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2931   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2932 }
2933 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2934 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2935   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2936 }
2937 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2938 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2939   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2940 }
2941 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2942 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2943   LocationSummary* locations =
2944       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2945   if (is_long) {
2946     locations->SetInAt(0, Location::RequiresRegister());
2947   } else {
2948     locations->SetInAt(0, Location::Any());
2949   }
2950   locations->SetOut(Location::RequiresRegister());
2951 }
2952 
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2953 static void GenTrailingZeros(X86Assembler* assembler,
2954                              CodeGeneratorX86* codegen,
2955                              HInvoke* invoke, bool is_long) {
2956   LocationSummary* locations = invoke->GetLocations();
2957   Location src = locations->InAt(0);
2958   Register out = locations->Out().AsRegister<Register>();
2959 
2960   if (invoke->InputAt(0)->IsConstant()) {
2961     // Evaluate this at compile time.
2962     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2963     if (value == 0) {
2964       value = is_long ? 64 : 32;
2965     } else {
2966       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2967     }
2968     codegen->Load32BitValue(out, value);
2969     return;
2970   }
2971 
2972   // Handle the non-constant cases.
2973   if (!is_long) {
2974     if (src.IsRegister()) {
2975       __ bsfl(out, src.AsRegister<Register>());
2976     } else {
2977       DCHECK(src.IsStackSlot());
2978       __ bsfl(out, Address(ESP, src.GetStackIndex()));
2979     }
2980 
2981     // BSF sets ZF if the input was zero, and the output is undefined.
2982     NearLabel done;
2983     __ j(kNotEqual, &done);
2984 
2985     // Fix the zero case with the expected result.
2986     __ movl(out, Immediate(32));
2987 
2988     __ Bind(&done);
2989     return;
2990   }
2991 
2992   // 64 bit case needs to worry about both parts of the register.
2993   DCHECK(src.IsRegisterPair());
2994   Register src_lo = src.AsRegisterPairLow<Register>();
2995   Register src_hi = src.AsRegisterPairHigh<Register>();
2996   NearLabel done, all_zeroes;
2997 
2998   // If the low word is zero, then ZF will be set.  If not, we have the answer.
2999   __ bsfl(out, src_lo);
3000   __ j(kNotEqual, &done);
3001 
3002   // Low word was zero.  We have to compute the high word count and add 32.
3003   __ bsfl(out, src_hi);
3004   __ j(kEqual, &all_zeroes);
3005 
3006   // We had a valid result.  Add 32 to account for the low word being zero.
3007   __ addl(out, Immediate(32));
3008   __ jmp(&done);
3009 
3010   // All zero case.
3011   __ Bind(&all_zeroes);
3012   __ movl(out, Immediate(64));
3013 
3014   __ Bind(&done);
3015 }
3016 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3017 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3018   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
3019 }
3020 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3021 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3022   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3023 }
3024 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3025 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3026   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
3027 }
3028 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3029 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3030   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3031 }
3032 
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)3033 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
3034   return instruction->InputAt(input0) == instruction->InputAt(input1);
3035 }
3036 
VisitSystemArrayCopy(HInvoke * invoke)3037 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
3038   // The only read barrier implementation supporting the
3039   // SystemArrayCopy intrinsic is the Baker-style read barriers.
3040   if (codegen_->EmitNonBakerReadBarrier()) {
3041     return;
3042   }
3043 
3044   constexpr int32_t kLengthThreshold = -1;  // No cut-off - handle large arrays in intrinsic code.
3045   constexpr size_t kInitialNumTemps = 0u;  // We shall allocate temps explicitly.
3046   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
3047       invoke, kLengthThreshold, kInitialNumTemps);
3048   if (locations != nullptr) {
3049     // Add temporaries.  We will use REP MOVSL, so we need fixed registers.
3050     DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
3051     locations->AddTemp(Location::RegisterLocation(ESI));
3052     locations->AddTemp(Location::RegisterLocation(EDI));
3053     locations->AddTemp(Location::RegisterLocation(ECX));  // Byte reg also used for write barrier.
3054 
3055     static constexpr size_t kSrc = 0;
3056     static constexpr size_t kSrcPos = 1;
3057     static constexpr size_t kDest = 2;
3058     static constexpr size_t kDestPos = 3;
3059     static constexpr size_t kLength = 4;
3060 
3061     if (!locations->InAt(kLength).IsConstant()) {
3062       // We may not have enough registers for all inputs and temps, so put the
3063       // non-const length explicitly to the same register as one of the temps.
3064       locations->SetInAt(kLength, Location::RegisterLocation(ECX));
3065     }
3066 
3067     if (codegen_->EmitBakerReadBarrier()) {
3068       // We need an additional temp in the slow path for holding the reference.
3069       if (locations->InAt(kSrcPos).IsConstant() ||
3070           locations->InAt(kDestPos).IsConstant() ||
3071           IsSameInput(invoke, kSrc, kDest) ||
3072           IsSameInput(invoke, kSrcPos, kDestPos)) {
3073         // We can allocate another temp register.
3074         locations->AddTemp(Location::RequiresRegister());
3075       } else {
3076         // Use the same fixed register for the non-const `src_pos` and the additional temp.
3077         // The `src_pos` is no longer needed when we reach the slow path.
3078         locations->SetInAt(kSrcPos, Location::RegisterLocation(EDX));
3079         locations->AddTemp(Location::RegisterLocation(EDX));
3080       }
3081     }
3082   }
3083 }
3084 
VisitSystemArrayCopy(HInvoke * invoke)3085 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
3086   // The only read barrier implementation supporting the
3087   // SystemArrayCopy intrinsic is the Baker-style read barriers.
3088   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3089 
3090   X86Assembler* assembler = GetAssembler();
3091   LocationSummary* locations = invoke->GetLocations();
3092 
3093   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3094   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3095   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3096   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3097   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3098 
3099   Register src = locations->InAt(0).AsRegister<Register>();
3100   Location src_pos = locations->InAt(1);
3101   Register dest = locations->InAt(2).AsRegister<Register>();
3102   Location dest_pos = locations->InAt(3);
3103   Location length = locations->InAt(4);
3104   Location temp1_loc = locations->GetTemp(0);
3105   Register temp1 = temp1_loc.AsRegister<Register>();
3106   Location temp2_loc = locations->GetTemp(1);
3107   Register temp2 = temp2_loc.AsRegister<Register>();
3108 
3109   SlowPathCode* intrinsic_slow_path =
3110       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3111   codegen_->AddSlowPath(intrinsic_slow_path);
3112 
3113   NearLabel conditions_on_positions_validated;
3114   SystemArrayCopyOptimizations optimizations(invoke);
3115 
3116   // If source and destination are the same, we go to slow path if we need to do forward copying.
3117   // We do not need to do this check if the source and destination positions are the same.
3118   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
3119     if (src_pos.IsConstant()) {
3120       int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
3121       if (dest_pos.IsConstant()) {
3122         int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3123         if (optimizations.GetDestinationIsSource()) {
3124           // Checked when building locations.
3125           DCHECK_GE(src_pos_constant, dest_pos_constant);
3126         } else if (src_pos_constant < dest_pos_constant) {
3127           __ cmpl(src, dest);
3128           __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3129         }
3130       } else {
3131         if (!optimizations.GetDestinationIsSource()) {
3132           __ cmpl(src, dest);
3133           __ j(kNotEqual, &conditions_on_positions_validated);
3134         }
3135         __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
3136         __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
3137       }
3138     } else {
3139       if (!optimizations.GetDestinationIsSource()) {
3140         __ cmpl(src, dest);
3141         __ j(kNotEqual, &conditions_on_positions_validated);
3142       }
3143       Register src_pos_reg = src_pos.AsRegister<Register>();
3144       EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
3145     }
3146   }
3147 
3148   __ Bind(&conditions_on_positions_validated);
3149 
3150   if (!optimizations.GetSourceIsNotNull()) {
3151     // Bail out if the source is null.
3152     __ testl(src, src);
3153     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3154   }
3155 
3156   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3157     // Bail out if the destination is null.
3158     __ testl(dest, dest);
3159     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3160   }
3161 
3162   // If the length is negative, bail out.
3163   // We have already checked in the LocationsBuilder for the constant case.
3164   if (!length.IsConstant() &&
3165       !optimizations.GetCountIsSourceLength() &&
3166       !optimizations.GetCountIsDestinationLength()) {
3167     __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3168     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3169   }
3170 
3171   // Validity checks: source.
3172   CheckSystemArrayCopyPosition(assembler,
3173                                src,
3174                                src_pos,
3175                                length,
3176                                intrinsic_slow_path,
3177                                temp1,
3178                                optimizations.GetCountIsSourceLength(),
3179                                /*position_sign_checked=*/ false);
3180 
3181   // Validity checks: dest.
3182   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
3183   CheckSystemArrayCopyPosition(assembler,
3184                                dest,
3185                                dest_pos,
3186                                length,
3187                                intrinsic_slow_path,
3188                                temp1,
3189                                optimizations.GetCountIsDestinationLength(),
3190                                dest_position_sign_checked);
3191 
3192   auto check_non_primitive_array_class = [&](Register klass, Register temp) {
3193     // No read barrier is needed for reading a chain of constant references for comparing
3194     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3195     // /* HeapReference<Class> */ temp = klass->component_type_
3196     __ movl(temp, Address(klass, component_offset));
3197     __ MaybeUnpoisonHeapReference(temp);
3198     // Check that the component type is not null.
3199     __ testl(temp, temp);
3200     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3201     // Check that the component type is not a primitive.
3202     __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
3203     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3204   };
3205 
3206   if (!optimizations.GetDoesNotNeedTypeCheck()) {
3207     // Check whether all elements of the source array are assignable to the component
3208     // type of the destination array. We do two checks: the classes are the same,
3209     // or the destination is Object[]. If none of these checks succeed, we go to the
3210     // slow path.
3211 
3212     if (codegen_->EmitBakerReadBarrier()) {
3213       // /* HeapReference<Class> */ temp1 = dest->klass_
3214       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3215           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
3216       // Register `temp1` is not trashed by the read barrier emitted
3217       // by GenerateFieldLoadWithBakerReadBarrier below, as that
3218       // method produces a call to a ReadBarrierMarkRegX entry point,
3219       // which saves all potentially live registers, including
3220       // temporaries such a `temp1`.
3221       // /* HeapReference<Class> */ temp2 = src->klass_
3222       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3223           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
3224     } else {
3225       // /* HeapReference<Class> */ temp1 = dest->klass_
3226       __ movl(temp1, Address(dest, class_offset));
3227       __ MaybeUnpoisonHeapReference(temp1);
3228       // /* HeapReference<Class> */ temp2 = src->klass_
3229       __ movl(temp2, Address(src, class_offset));
3230       __ MaybeUnpoisonHeapReference(temp2);
3231     }
3232 
3233     __ cmpl(temp1, temp2);
3234     if (optimizations.GetDestinationIsTypedObjectArray()) {
3235       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3236       NearLabel do_copy;
3237       // For class match, we can skip the source type check regardless of the optimization flag.
3238       __ j(kEqual, &do_copy);
3239       // No read barrier is needed for reading a chain of constant references
3240       // for comparing with null, see `ReadBarrierOption`.
3241       // /* HeapReference<Class> */ temp1 = temp1->component_type_
3242       __ movl(temp1, Address(temp1, component_offset));
3243       __ MaybeUnpoisonHeapReference(temp1);
3244       // No need to unpoison the following heap reference load, as
3245       // we're comparing against null.
3246       __ cmpl(Address(temp1, super_offset), Immediate(0));
3247       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3248       // Bail out if the source is not a non primitive array.
3249       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3250         check_non_primitive_array_class(temp2, temp2);
3251       }
3252       __ Bind(&do_copy);
3253     } else {
3254       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
3255       // For class match, we can skip the array type check completely if at least one of source
3256       // and destination is known to be a non primitive array, otherwise one check is enough.
3257       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3258       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
3259           !optimizations.GetSourceIsNonPrimitiveArray()) {
3260         check_non_primitive_array_class(temp2, temp2);
3261       }
3262     }
3263   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3264     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3265     // Bail out if the source is not a non primitive array.
3266     // No read barrier is needed for reading a chain of constant references for comparing
3267     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3268     // /* HeapReference<Class> */ temp1 = src->klass_
3269     __ movl(temp1, Address(src, class_offset));
3270     __ MaybeUnpoisonHeapReference(temp1);
3271     check_non_primitive_array_class(temp1, temp1);
3272   }
3273 
3274   if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
3275     // Null constant length: not need to emit the loop code at all.
3276   } else {
3277     const DataType::Type type = DataType::Type::kReference;
3278     const size_t data_size = DataType::Size(type);
3279     const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
3280 
3281     // Don't enter copy loop if `length == 0`.
3282     NearLabel skip_copy_and_write_barrier;
3283     if (!length.IsConstant()) {
3284       __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3285       __ j(kEqual, &skip_copy_and_write_barrier);
3286     }
3287 
3288     // Compute the base source address in `temp1`.
3289     GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
3290     // Compute the base destination address in `temp2`.
3291     GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
3292 
3293     SlowPathCode* read_barrier_slow_path = nullptr;
3294     if (codegen_->EmitBakerReadBarrier()) {
3295       // SystemArrayCopy implementation for Baker read barriers (see
3296       // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3297       //
3298       //   if (src_ptr != end_ptr) {
3299       //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3300       //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
3301       //     bool is_gray = (rb_state == ReadBarrier::GrayState());
3302       //     if (is_gray) {
3303       //       // Slow-path copy.
3304       //       for (size_t i = 0; i != length; ++i) {
3305       //         dest_array[dest_pos + i] =
3306       //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3307       //       }
3308       //     } else {
3309       //       // Fast-path copy.
3310       //       do {
3311       //         *dest_ptr++ = *src_ptr++;
3312       //       } while (src_ptr != end_ptr)
3313       //     }
3314       //   }
3315 
3316       // Given the numeric representation, it's enough to check the low bit of the rb_state.
3317       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3318       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3319       constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3320       constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3321       constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3322 
3323       // if (rb_state == ReadBarrier::GrayState())
3324       //   goto slow_path;
3325       // At this point, just do the "if" and make sure that flags are preserved until the branch.
3326       __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
3327 
3328       // Load fence to prevent load-load reordering.
3329       // Note that this is a no-op, thanks to the x86 memory model.
3330       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3331 
3332       // Slow path used to copy array when `src` is gray.
3333       read_barrier_slow_path =
3334           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3335       codegen_->AddSlowPath(read_barrier_slow_path);
3336 
3337       // We have done the "if" of the gray bit check above, now branch based on the flags.
3338       __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
3339     }
3340 
3341     Register temp3 = locations->GetTemp(2).AsRegister<Register>();
3342     if (length.IsConstant()) {
3343       __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
3344     } else {
3345       DCHECK_EQ(temp3, length.AsRegister<Register>());
3346     }
3347 
3348     // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
3349     DCHECK_EQ(temp1, ESI);
3350     DCHECK_EQ(temp2, EDI);
3351     DCHECK_EQ(temp3, ECX);
3352     __ rep_movsl();
3353 
3354     if (read_barrier_slow_path != nullptr) {
3355       DCHECK(codegen_->EmitBakerReadBarrier());
3356       __ Bind(read_barrier_slow_path->GetExitLabel());
3357     }
3358 
3359     // We only need one card marking on the destination array.
3360     codegen_->MarkGCCard(temp1, temp3, dest);
3361 
3362     __ Bind(&skip_copy_and_write_barrier);
3363   }
3364 
3365   __ Bind(intrinsic_slow_path->GetExitLabel());
3366 }
3367 
RequestBaseMethodAddressInRegister(HInvoke * invoke)3368 static void RequestBaseMethodAddressInRegister(HInvoke* invoke) {
3369   LocationSummary* locations = invoke->GetLocations();
3370   if (locations != nullptr) {
3371     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
3372     // Note: The base method address is not present yet when this is called from the
3373     // PCRelativeHandlerVisitor via IsCallFreeIntrinsic() to determine whether to insert it.
3374     if (invoke_static_or_direct->HasSpecialInput()) {
3375       DCHECK(invoke_static_or_direct->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
3376                  ->IsX86ComputeBaseMethodAddress());
3377       locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
3378                          Location::RequiresRegister());
3379     }
3380   }
3381 }
3382 
3383 #define VISIT_INTRINSIC(name, low, high, type, start_index)                              \
3384   void IntrinsicLocationsBuilderX86::Visit##name##ValueOf(HInvoke* invoke) {             \
3385     InvokeRuntimeCallingConvention calling_convention;                                   \
3386     IntrinsicVisitor::ComputeValueOfLocations(                                           \
3387         invoke,                                                                          \
3388         codegen_,                                                                        \
3389         low,                                                                             \
3390         (high) - (low) + 1,                                                              \
3391         Location::RegisterLocation(EAX),                                                 \
3392         Location::RegisterLocation(calling_convention.GetRegisterAt(0)));                \
3393     RequestBaseMethodAddressInRegister(invoke);                                          \
3394   }                                                                                      \
3395   void IntrinsicCodeGeneratorX86::Visit##name##ValueOf(HInvoke* invoke) {                \
3396     IntrinsicVisitor::ValueOfInfo info =                                                 \
3397         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                     \
3398                                              codegen_->GetCompilerOptions(),             \
3399                                              WellKnownClasses::java_lang_##name##_value, \
3400                                              low,                                        \
3401                                              (high) - (low) + 1,                         \
3402                                              start_index);                               \
3403     HandleValueOf(invoke, info, type);                                                   \
3404   }
BOXED_TYPES(VISIT_INTRINSIC)3405   BOXED_TYPES(VISIT_INTRINSIC)
3406 #undef VISIT_INTRINSIC
3407 
3408 void IntrinsicCodeGeneratorX86::HandleValueOf(HInvoke* invoke,
3409                                               const IntrinsicVisitor::ValueOfInfo& info,
3410                                               DataType::Type type) {
3411   DCHECK(invoke->IsInvokeStaticOrDirect());
3412   LocationSummary* locations = invoke->GetLocations();
3413   X86Assembler* assembler = GetAssembler();
3414 
3415   Register out = locations->Out().AsRegister<Register>();
3416   auto allocate_instance = [&]() {
3417     DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
3418     codegen_->LoadIntrinsicDeclaringClass(out, invoke->AsInvokeStaticOrDirect());
3419     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3420     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3421   };
3422   if (invoke->InputAt(0)->IsIntConstant()) {
3423     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3424     if (static_cast<uint32_t>(value - info.low) < info.length) {
3425       // Just embed the object in the code.
3426       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3427       codegen_->LoadBootImageAddress(
3428           out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
3429     } else {
3430       DCHECK(locations->CanCall());
3431       // Allocate and initialize a new j.l.Integer.
3432       // TODO: If we JIT, we could allocate the object now, and store it in the
3433       // JIT object table.
3434       allocate_instance();
3435       codegen_->MoveToMemory(type,
3436                              Location::ConstantLocation(invoke->InputAt(0)->AsIntConstant()),
3437                              out,
3438                              /* dst_index= */ Register::kNoRegister,
3439                              /* dst_scale= */ TIMES_1,
3440                              /* dst_disp= */ info.value_offset);
3441     }
3442   } else {
3443     DCHECK(locations->CanCall());
3444     Register in = locations->InAt(0).AsRegister<Register>();
3445     // Check bounds of our cache.
3446     __ leal(out, Address(in, -info.low));
3447     __ cmpl(out, Immediate(info.length));
3448     NearLabel allocate, done;
3449     __ j(kAboveEqual, &allocate);
3450     // If the value is within the bounds, load the object directly from the array.
3451     constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3452     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3453                   "Check heap reference size.");
3454     if (codegen_->GetCompilerOptions().IsBootImage()) {
3455       DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3456       size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3457       HX86ComputeBaseMethodAddress* method_address =
3458           invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3459       DCHECK(method_address != nullptr);
3460       Register method_address_reg =
3461           invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3462       __ movl(out,
3463               Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
3464       codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3465     } else {
3466       // Note: We're about to clobber the index in `out`, so we need to use `in` and
3467       // adjust the offset accordingly.
3468       uint32_t mid_array_boot_image_offset =
3469               info.array_data_boot_image_reference - info.low * kElementSize;
3470       codegen_->LoadBootImageAddress(
3471           out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3472       DCHECK_NE(out, in);
3473       __ movl(out, Address(out, in, TIMES_4, 0));
3474     }
3475     __ MaybeUnpoisonHeapReference(out);
3476     __ jmp(&done);
3477     __ Bind(&allocate);
3478     // Otherwise allocate and initialize a new object.
3479     allocate_instance();
3480     codegen_->MoveToMemory(type,
3481                            Location::RegisterLocation(in),
3482                            out,
3483                            /* dst_index= */ Register::kNoRegister,
3484                            /* dst_scale= */ TIMES_1,
3485                            /* dst_disp= */ info.value_offset);
3486     __ Bind(&done);
3487   }
3488 }
3489 
VisitReferenceGetReferent(HInvoke * invoke)3490 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
3491   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3492   RequestBaseMethodAddressInRegister(invoke);
3493 }
3494 
VisitReferenceGetReferent(HInvoke * invoke)3495 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
3496   X86Assembler* assembler = GetAssembler();
3497   LocationSummary* locations = invoke->GetLocations();
3498 
3499   Location obj = locations->InAt(0);
3500   Location out = locations->Out();
3501 
3502   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
3503   codegen_->AddSlowPath(slow_path);
3504 
3505   if (codegen_->EmitReadBarrier()) {
3506     // Check self->GetWeakRefAccessEnabled().
3507     ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
3508     __ fs()->cmpl(Address::Absolute(offset),
3509                   Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3510     __ j(kNotEqual, slow_path->GetEntryLabel());
3511   }
3512 
3513   // Load the java.lang.ref.Reference class, use the output register as a temporary.
3514   codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<Register>(),
3515                                         invoke->AsInvokeStaticOrDirect());
3516 
3517   // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3518   MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3519   DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3520   DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3521             IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3522   __ cmpw(Address(out.AsRegister<Register>(), disable_intrinsic_offset.Uint32Value()),
3523           Immediate(0));
3524   __ j(kNotEqual, slow_path->GetEntryLabel());
3525 
3526   // Load the value from the field.
3527   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3528   if (codegen_->EmitBakerReadBarrier()) {
3529     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3530                                                     out,
3531                                                     obj.AsRegister<Register>(),
3532                                                     referent_offset,
3533                                                     /*needs_null_check=*/ true);
3534     // Note that the fence is a no-op, thanks to the x86 memory model.
3535     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3536   } else {
3537     __ movl(out.AsRegister<Register>(), Address(obj.AsRegister<Register>(), referent_offset));
3538     codegen_->MaybeRecordImplicitNullCheck(invoke);
3539     // Note that the fence is a no-op, thanks to the x86 memory model.
3540     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3541     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3542   }
3543   __ Bind(slow_path->GetExitLabel());
3544 }
3545 
VisitReferenceRefersTo(HInvoke * invoke)3546 void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) {
3547   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3548 }
3549 
VisitReferenceRefersTo(HInvoke * invoke)3550 void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
3551   X86Assembler* assembler = GetAssembler();
3552   LocationSummary* locations = invoke->GetLocations();
3553 
3554   Register obj = locations->InAt(0).AsRegister<Register>();
3555   Register other = locations->InAt(1).AsRegister<Register>();
3556   Register out = locations->Out().AsRegister<Register>();
3557 
3558   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3559   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3560 
3561   __ movl(out, Address(obj, referent_offset));
3562   codegen_->MaybeRecordImplicitNullCheck(invoke);
3563   __ MaybeUnpoisonHeapReference(out);
3564   // Note that the fence is a no-op, thanks to the x86 memory model.
3565   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3566 
3567   NearLabel end, return_true, return_false;
3568   __ cmpl(out, other);
3569 
3570   if (codegen_->EmitReadBarrier()) {
3571     DCHECK(kUseBakerReadBarrier);
3572 
3573     __ j(kEqual, &return_true);
3574 
3575     // Check if the loaded reference is null.
3576     __ testl(out, out);
3577     __ j(kZero, &return_false);
3578 
3579     // For correct memory visibility, we need a barrier before loading the lock word
3580     // but we already have the barrier emitted for volatile load above which is sufficient.
3581 
3582     // Load the lockword and check if it is a forwarding address.
3583     static_assert(LockWord::kStateShift == 30u);
3584     static_assert(LockWord::kStateForwardingAddress == 3u);
3585     __ movl(out, Address(out, monitor_offset));
3586     __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3587     __ j(kBelow, &return_false);
3588 
3589     // Extract the forwarding address and compare with `other`.
3590     __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3591     __ cmpl(out, other);
3592   }
3593 
3594   __ j(kNotEqual, &return_false);
3595 
3596   // Return true and exit the function.
3597   __ Bind(&return_true);
3598   __ movl(out, Immediate(1));
3599   __ jmp(&end);
3600 
3601   // Return false and exit the function.
3602   __ Bind(&return_false);
3603   __ xorl(out, out);
3604   __ Bind(&end);
3605 }
3606 
VisitThreadInterrupted(HInvoke * invoke)3607 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3608   LocationSummary* locations =
3609       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3610   locations->SetOut(Location::RequiresRegister());
3611 }
3612 
VisitThreadInterrupted(HInvoke * invoke)3613 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3614   X86Assembler* assembler = GetAssembler();
3615   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3616   Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3617   NearLabel done;
3618   __ fs()->movl(out, address);
3619   __ testl(out, out);
3620   __ j(kEqual, &done);
3621   __ fs()->movl(address, Immediate(0));
3622   codegen_->MemoryFence();
3623   __ Bind(&done);
3624 }
3625 
VisitReachabilityFence(HInvoke * invoke)3626 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3627   LocationSummary* locations =
3628       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3629   locations->SetInAt(0, Location::Any());
3630 }
3631 
VisitReachabilityFence(HInvoke * invoke)3632 void IntrinsicCodeGeneratorX86::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3633 
VisitIntegerDivideUnsigned(HInvoke * invoke)3634 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3635   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3636                                                                 LocationSummary::kCallOnSlowPath,
3637                                                                 kIntrinsified);
3638   locations->SetInAt(0, Location::RegisterLocation(EAX));
3639   locations->SetInAt(1, Location::RequiresRegister());
3640   locations->SetOut(Location::SameAsFirstInput());
3641   // Intel uses edx:eax as the dividend.
3642   locations->AddTemp(Location::RegisterLocation(EDX));
3643 }
3644 
VisitIntegerDivideUnsigned(HInvoke * invoke)3645 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3646   X86Assembler* assembler = GetAssembler();
3647   LocationSummary* locations = invoke->GetLocations();
3648   Location out = locations->Out();
3649   Location first = locations->InAt(0);
3650   Location second = locations->InAt(1);
3651   Register edx = locations->GetTemp(0).AsRegister<Register>();
3652   Register second_reg = second.AsRegister<Register>();
3653 
3654   DCHECK_EQ(EAX, first.AsRegister<Register>());
3655   DCHECK_EQ(EAX, out.AsRegister<Register>());
3656   DCHECK_EQ(EDX, edx);
3657 
3658   // Check if divisor is zero, bail to managed implementation to handle.
3659   __ testl(second_reg, second_reg);
3660   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3661   codegen_->AddSlowPath(slow_path);
3662   __ j(kEqual, slow_path->GetEntryLabel());
3663 
3664   __ xorl(edx, edx);
3665   __ divl(second_reg);
3666 
3667   __ Bind(slow_path->GetExitLabel());
3668 }
3669 
HasVarHandleIntrinsicImplementation(HInvoke * invoke)3670 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
3671   VarHandleOptimizations optimizations(invoke);
3672   if (optimizations.GetDoNotIntrinsify()) {
3673     return false;
3674   }
3675 
3676   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3677   DCHECK_LE(expected_coordinates_count, 2u);  // Filtered by the `DoNotIntrinsify` flag above.
3678   if (expected_coordinates_count > 1u) {
3679     // Only static and instance fields VarHandle are supported now.
3680     // TODO: add support for arrays and views.
3681     return false;
3682   }
3683 
3684   return true;
3685 }
3686 
GenerateVarHandleAccessModeCheck(Register varhandle_object,mirror::VarHandle::AccessMode access_mode,SlowPathCode * slow_path,X86Assembler * assembler)3687 static void GenerateVarHandleAccessModeCheck(Register varhandle_object,
3688                                              mirror::VarHandle::AccessMode access_mode,
3689                                              SlowPathCode* slow_path,
3690                                              X86Assembler* assembler) {
3691   const uint32_t access_modes_bitmask_offset =
3692       mirror::VarHandle::AccessModesBitMaskOffset().Uint32Value();
3693   const uint32_t access_mode_bit = 1u << static_cast<uint32_t>(access_mode);
3694 
3695   // If the access mode is not supported, bail to runtime implementation to handle
3696   __ testl(Address(varhandle_object, access_modes_bitmask_offset), Immediate(access_mode_bit));
3697   __ j(kZero, slow_path->GetEntryLabel());
3698 }
3699 
GenerateVarHandleStaticFieldCheck(Register varhandle_object,SlowPathCode * slow_path,X86Assembler * assembler)3700 static void GenerateVarHandleStaticFieldCheck(Register varhandle_object,
3701                                               SlowPathCode* slow_path,
3702                                               X86Assembler* assembler) {
3703   const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3704 
3705   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3706   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3707   __ cmpl(Address(varhandle_object, coordtype0_offset), Immediate(0));
3708   __ j(kNotEqual, slow_path->GetEntryLabel());
3709 }
3710 
GenerateSubTypeObjectCheck(Register object,Register temp,Address type_address,SlowPathCode * slow_path,X86Assembler * assembler,bool object_can_be_null=true)3711 static void GenerateSubTypeObjectCheck(Register object,
3712                                        Register temp,
3713                                        Address type_address,
3714                                        SlowPathCode* slow_path,
3715                                        X86Assembler* assembler,
3716                                        bool object_can_be_null = true) {
3717   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
3718   const uint32_t super_class_offset = mirror::Class::SuperClassOffset().Uint32Value();
3719   NearLabel check_type_compatibility, type_matched;
3720 
3721   // If the object is null, there is no need to check the type
3722   if (object_can_be_null) {
3723     __ testl(object, object);
3724     __ j(kZero, &type_matched);
3725   }
3726 
3727   // Do not unpoison for in-memory comparison.
3728   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3729   __ movl(temp, Address(object, class_offset));
3730   __ Bind(&check_type_compatibility);
3731   __ cmpl(temp, type_address);
3732   __ j(kEqual, &type_matched);
3733   // Load the super class.
3734   __ MaybeUnpoisonHeapReference(temp);
3735   __ movl(temp, Address(temp, super_class_offset));
3736   // If the super class is null, we reached the root of the hierarchy without a match.
3737   // We let the slow path handle uncovered cases (e.g. interfaces).
3738   __ testl(temp, temp);
3739   __ j(kEqual, slow_path->GetEntryLabel());
3740   __ jmp(&check_type_compatibility);
3741   __ Bind(&type_matched);
3742 }
3743 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3744 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3745                                                  Register temp,
3746                                                  SlowPathCode* slow_path,
3747                                                  X86Assembler* assembler) {
3748   VarHandleOptimizations optimizations(invoke);
3749   LocationSummary* locations = invoke->GetLocations();
3750   Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3751   Register object = locations->InAt(1).AsRegister<Register>();
3752 
3753   const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3754   const uint32_t coordtype1_offset = mirror::VarHandle::CoordinateType1Offset().Uint32Value();
3755 
3756   // Check that the VarHandle references an instance field by checking that
3757   // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3758   // type compatibility check with the source object's type, which will fail for null.
3759   __ cmpl(Address(varhandle_object, coordtype1_offset), Immediate(0));
3760   __ j(kNotEqual, slow_path->GetEntryLabel());
3761 
3762   // Check if the object is null
3763   if (!optimizations.GetSkipObjectNullCheck()) {
3764     __ testl(object, object);
3765     __ j(kZero, slow_path->GetEntryLabel());
3766   }
3767 
3768   // Check the object's class against coordinateType0.
3769   GenerateSubTypeObjectCheck(object,
3770                              temp,
3771                              Address(varhandle_object, coordtype0_offset),
3772                              slow_path,
3773                              assembler,
3774                              /* object_can_be_null= */ false);
3775 }
3776 
GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,Register temp,DataType::Type type,SlowPathCode * slow_path,X86Assembler * assembler)3777 static void GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,
3778                                               Register temp,
3779                                               DataType::Type type,
3780                                               SlowPathCode* slow_path,
3781                                               X86Assembler* assembler) {
3782   const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3783   const uint32_t primitive_type_offset = mirror::Class::PrimitiveTypeOffset().Uint32Value();
3784   const uint32_t primitive_type = static_cast<uint32_t>(DataTypeToPrimitive(type));
3785 
3786   // We do not need a read barrier when loading a reference only for loading a constant field
3787   // through the reference.
3788   __ movl(temp, Address(varhandle_object, var_type_offset));
3789   __ MaybeUnpoisonHeapReference(temp);
3790   __ cmpw(Address(temp, primitive_type_offset), Immediate(primitive_type));
3791   __ j(kNotEqual, slow_path->GetEntryLabel());
3792 }
3793 
GenerateVarHandleCommonChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3794 static void GenerateVarHandleCommonChecks(HInvoke *invoke,
3795                                           Register temp,
3796                                           SlowPathCode* slow_path,
3797                                           X86Assembler* assembler) {
3798   LocationSummary* locations = invoke->GetLocations();
3799   Register vh_object = locations->InAt(0).AsRegister<Register>();
3800   mirror::VarHandle::AccessMode access_mode =
3801       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3802 
3803   GenerateVarHandleAccessModeCheck(vh_object,
3804                                    access_mode,
3805                                    slow_path,
3806                                    assembler);
3807 
3808   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3809   switch (expected_coordinates_count) {
3810     case 0u:
3811       GenerateVarHandleStaticFieldCheck(vh_object, slow_path, assembler);
3812       break;
3813     case 1u: {
3814       GenerateVarHandleInstanceFieldChecks(invoke, temp, slow_path, assembler);
3815       break;
3816     }
3817     default:
3818       LOG(FATAL) << "Unexpected coordinates count: " << expected_coordinates_count;
3819       UNREACHABLE();
3820   }
3821 
3822   // Check the return type and varType parameters.
3823   mirror::VarHandle::AccessModeTemplate access_mode_template =
3824       mirror::VarHandle::GetAccessModeTemplate(access_mode);
3825   DataType::Type type = invoke->GetType();
3826 
3827   switch (access_mode_template) {
3828     case mirror::VarHandle::AccessModeTemplate::kGet:
3829       // Check the varType.primitiveType against the type we're trying to retrieve. Reference types
3830       // are also checked later by a HCheckCast node as an additional check.
3831       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler);
3832       break;
3833     case mirror::VarHandle::AccessModeTemplate::kSet:
3834     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
3835       uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3836       DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3837 
3838       // Check the varType.primitiveType against the type of the value we're trying to set.
3839       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3840       if (value_type == DataType::Type::kReference) {
3841         const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3842 
3843         // If the value type is a reference, check it against the varType.
3844         GenerateSubTypeObjectCheck(locations->InAt(value_index).AsRegister<Register>(),
3845                                    temp,
3846                                    Address(vh_object, var_type_offset),
3847                                    slow_path,
3848                                    assembler);
3849       }
3850       break;
3851     }
3852     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
3853     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
3854       uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
3855       uint32_t expected_value_index = invoke->GetNumberOfArguments() - 2;
3856       DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
3857       DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_value_index));
3858 
3859       // Check the varType.primitiveType against the type of the expected value.
3860       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3861       if (value_type == DataType::Type::kReference) {
3862         const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3863 
3864         // If the value type is a reference, check both the expected and the new value against
3865         // the varType.
3866         GenerateSubTypeObjectCheck(locations->InAt(new_value_index).AsRegister<Register>(),
3867                                    temp,
3868                                    Address(vh_object, var_type_offset),
3869                                    slow_path,
3870                                    assembler);
3871         GenerateSubTypeObjectCheck(locations->InAt(expected_value_index).AsRegister<Register>(),
3872                                    temp,
3873                                    Address(vh_object, var_type_offset),
3874                                    slow_path,
3875                                    assembler);
3876       }
3877       break;
3878     }
3879   }
3880 }
3881 
3882 // This method loads the field's address referred by a field VarHandle (base + offset).
3883 // The return value is the register containing object's reference (in case of an instance field)
3884 // or the declaring class (in case of a static field). The declaring class is stored in temp
3885 // register. Field's offset is loaded to the `offset` register.
GenerateVarHandleFieldReference(HInvoke * invoke,CodeGeneratorX86 * codegen,Register temp,Register offset)3886 static Register GenerateVarHandleFieldReference(HInvoke* invoke,
3887                                                 CodeGeneratorX86* codegen,
3888                                                 Register temp,
3889                                                 /*out*/ Register offset) {
3890   X86Assembler* assembler = codegen->GetAssembler();
3891   LocationSummary* locations = invoke->GetLocations();
3892   const uint32_t artfield_offset = mirror::FieldVarHandle::ArtFieldOffset().Uint32Value();
3893   const uint32_t offset_offset = ArtField::OffsetOffset().Uint32Value();
3894   const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value();
3895   Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3896 
3897   // Load the ArtField* and the offset.
3898   __ movl(temp, Address(varhandle_object, artfield_offset));
3899   __ movl(offset, Address(temp, offset_offset));
3900   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3901   if (expected_coordinates_count == 0) {
3902     // For static fields, load the declaring class
3903     InstructionCodeGeneratorX86* instr_codegen =
3904         down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
3905     instr_codegen->GenerateGcRootFieldLoad(invoke,
3906                                            Location::RegisterLocation(temp),
3907                                            Address(temp, declaring_class_offset),
3908                                            /* fixup_label= */ nullptr,
3909                                            codegen->GetCompilerReadBarrierOption());
3910     return temp;
3911   }
3912 
3913   // For instance fields, return the register containing the object.
3914   DCHECK_EQ(expected_coordinates_count, 1u);
3915 
3916   return locations->InAt(1).AsRegister<Register>();
3917 }
3918 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)3919 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
3920   // The only read barrier implementation supporting the
3921   // VarHandleGet intrinsic is the Baker-style read barriers.
3922   if (codegen->EmitNonBakerReadBarrier()) {
3923     return;
3924   }
3925 
3926   if (!HasVarHandleIntrinsicImplementation(invoke)) {
3927     return;
3928   }
3929 
3930   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3931   LocationSummary* locations = new (allocator) LocationSummary(
3932       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3933   locations->SetInAt(0, Location::RequiresRegister());
3934   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3935   if (expected_coordinates_count == 1u) {
3936     // For instance fields, this is the source object.
3937     locations->SetInAt(1, Location::RequiresRegister());
3938   }
3939   locations->AddTemp(Location::RequiresRegister());
3940 
3941   DataType::Type type = invoke->GetType();
3942   switch (DataType::Kind(type)) {
3943     case DataType::Type::kInt64:
3944       locations->AddTemp(Location::RequiresRegister());
3945       if (invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3946         // We need an XmmRegister for Int64 to ensure an atomic load
3947         locations->AddTemp(Location::RequiresFpuRegister());
3948       }
3949       FALLTHROUGH_INTENDED;
3950     case DataType::Type::kInt32:
3951     case DataType::Type::kReference:
3952       locations->SetOut(Location::RequiresRegister());
3953       break;
3954     default:
3955       DCHECK(DataType::IsFloatingPointType(type));
3956       locations->AddTemp(Location::RequiresRegister());
3957       locations->SetOut(Location::RequiresFpuRegister());
3958   }
3959 }
3960 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86 * codegen)3961 static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3962   // The only read barrier implementation supporting the
3963   // VarHandleGet intrinsic is the Baker-style read barriers.
3964   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
3965 
3966   X86Assembler* assembler = codegen->GetAssembler();
3967   LocationSummary* locations = invoke->GetLocations();
3968   DataType::Type type = invoke->GetType();
3969   DCHECK_NE(type, DataType::Type::kVoid);
3970   Register temp = locations->GetTemp(0).AsRegister<Register>();
3971   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3972   codegen->AddSlowPath(slow_path);
3973 
3974   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3975 
3976   Location out = locations->Out();
3977   // Use 'out' as a temporary register if it's a core register
3978   Register offset =
3979       out.IsRegister() ? out.AsRegister<Register>() : locations->GetTemp(1).AsRegister<Register>();
3980 
3981   // Get the field referred by the VarHandle. The returned register contains the object reference
3982   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3983   // declaring class will be placed in 'temp' register.
3984   Register ref = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3985   Address field_addr(ref, offset, TIMES_1, 0);
3986 
3987   // Load the value from the field
3988   if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
3989     codegen->GenerateReferenceLoadWithBakerReadBarrier(
3990         invoke, out, ref, field_addr, /* needs_null_check= */ false);
3991   } else if (type == DataType::Type::kInt64 &&
3992              invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3993     XmmRegister xmm_temp = locations->GetTemp(2).AsFpuRegister<XmmRegister>();
3994     codegen->LoadFromMemoryNoBarrier(
3995         type, out, field_addr, /* instr= */ nullptr, xmm_temp, /* is_atomic_load= */ true);
3996   } else {
3997     codegen->LoadFromMemoryNoBarrier(type, out, field_addr);
3998   }
3999 
4000   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetVolatile ||
4001       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAcquire) {
4002     // Load fence to prevent load-load reordering.
4003     // Note that this is a no-op, thanks to the x86 memory model.
4004     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4005   }
4006 
4007   __ Bind(slow_path->GetExitLabel());
4008 }
4009 
VisitVarHandleGet(HInvoke * invoke)4010 void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
4011   CreateVarHandleGetLocations(invoke, codegen_);
4012 }
4013 
VisitVarHandleGet(HInvoke * invoke)4014 void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
4015   GenerateVarHandleGet(invoke, codegen_);
4016 }
4017 
VisitVarHandleGetVolatile(HInvoke * invoke)4018 void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
4019   CreateVarHandleGetLocations(invoke, codegen_);
4020 }
4021 
VisitVarHandleGetVolatile(HInvoke * invoke)4022 void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
4023   GenerateVarHandleGet(invoke, codegen_);
4024 }
4025 
VisitVarHandleGetAcquire(HInvoke * invoke)4026 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
4027   CreateVarHandleGetLocations(invoke, codegen_);
4028 }
4029 
VisitVarHandleGetAcquire(HInvoke * invoke)4030 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
4031   GenerateVarHandleGet(invoke, codegen_);
4032 }
4033 
VisitVarHandleGetOpaque(HInvoke * invoke)4034 void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
4035   CreateVarHandleGetLocations(invoke, codegen_);
4036 }
4037 
VisitVarHandleGetOpaque(HInvoke * invoke)4038 void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
4039   GenerateVarHandleGet(invoke, codegen_);
4040 }
4041 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4042 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4043   // The only read barrier implementation supporting the
4044   // VarHandleGet intrinsic is the Baker-style read barriers.
4045   if (codegen->EmitNonBakerReadBarrier()) {
4046     return;
4047   }
4048 
4049   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4050     return;
4051   }
4052 
4053   // The last argument should be the value we intend to set.
4054   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4055   HInstruction* value = invoke->InputAt(value_index);
4056   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4057   bool needs_atomicity = invoke->GetIntrinsic() != Intrinsics::kVarHandleSet;
4058   if (value_type == DataType::Type::kInt64 && (!value->IsConstant() || needs_atomicity)) {
4059     // We avoid the case of a non-constant (or volatile) Int64 value because we would need to
4060     // place it in a register pair. If the slow path is taken, the ParallelMove might fail to move
4061     // the pair according to the X86DexCallingConvention in case of an overlap (e.g., move the
4062     // int64 value from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4063     return;
4064   }
4065 
4066   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4067   LocationSummary* locations = new (allocator) LocationSummary(
4068       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4069   locations->SetInAt(0, Location::RequiresRegister());
4070   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4071   if (expected_coordinates_count == 1u) {
4072     // For instance fields, this is the source object
4073     locations->SetInAt(1, Location::RequiresRegister());
4074   }
4075 
4076   switch (value_type) {
4077     case DataType::Type::kBool:
4078     case DataType::Type::kInt8:
4079     case DataType::Type::kUint8:
4080       // Ensure the value is in a byte register
4081       locations->SetInAt(value_index, Location::ByteRegisterOrConstant(EBX, value));
4082       break;
4083     case DataType::Type::kInt16:
4084     case DataType::Type::kUint16:
4085     case DataType::Type::kInt32:
4086       locations->SetInAt(value_index, Location::RegisterOrConstant(value));
4087       break;
4088     case DataType::Type::kInt64:
4089       // We only handle constant non-atomic int64 values.
4090       DCHECK(value->IsConstant());
4091       locations->SetInAt(value_index, Location::ConstantLocation(value));
4092       break;
4093     case DataType::Type::kReference:
4094       locations->SetInAt(value_index, Location::RequiresRegister());
4095       break;
4096     default:
4097       DCHECK(DataType::IsFloatingPointType(value_type));
4098       if (needs_atomicity && value_type == DataType::Type::kFloat64) {
4099         locations->SetInAt(value_index, Location::RequiresFpuRegister());
4100       } else {
4101         locations->SetInAt(value_index, Location::FpuRegisterOrConstant(value));
4102       }
4103   }
4104 
4105   locations->AddTemp(Location::RequiresRegister());
4106   // This temporary register is also used for card for MarkGCCard. Make sure it's a byte register
4107   locations->AddTemp(Location::RegisterLocation(EAX));
4108   if (expected_coordinates_count == 0 && value_type == DataType::Type::kReference) {
4109     // For static reference fields, we need another temporary for the declaring class. We set it
4110     // last because we want to make sure that the first 2 temps are reserved for HandleFieldSet.
4111     locations->AddTemp(Location::RequiresRegister());
4112   }
4113 }
4114 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4115 static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4116   // The only read barrier implementation supporting the
4117   // VarHandleGet intrinsic is the Baker-style read barriers.
4118   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4119 
4120   X86Assembler* assembler = codegen->GetAssembler();
4121   LocationSummary* locations = invoke->GetLocations();
4122   // The value we want to set is the last argument
4123   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4124   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4125   Register temp = locations->GetTemp(0).AsRegister<Register>();
4126   Register temp2 = locations->GetTemp(1).AsRegister<Register>();
4127   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4128   codegen->AddSlowPath(slow_path);
4129 
4130   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4131 
4132   // For static reference fields, we need another temporary for the declaring class. But since
4133   // for instance fields the object is in a separate register, it is safe to use the first
4134   // temporary register for GenerateVarHandleFieldReference.
4135   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4136   if (value_type == DataType::Type::kReference && expected_coordinates_count == 0) {
4137     temp = locations->GetTemp(2).AsRegister<Register>();
4138   }
4139 
4140   Register offset = temp2;
4141   // Get the field referred by the VarHandle. The returned register contains the object reference
4142   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4143   // declaring class will be placed in 'temp' register.
4144   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4145 
4146   bool is_volatile = false;
4147   switch (invoke->GetIntrinsic()) {
4148     case Intrinsics::kVarHandleSet:
4149     case Intrinsics::kVarHandleSetOpaque:
4150       // The only constraint for setOpaque is to ensure bitwise atomicity (atomically set 64 bit
4151       // values), but we don't treat Int64 values because we would need to place it in a register
4152       // pair. If the slow path is taken, the Parallel move might fail to move the register pair
4153       // in case of an overlap (e.g., move from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4154       break;
4155     case Intrinsics::kVarHandleSetRelease:
4156       // setRelease needs to ensure atomicity too. See the above comment.
4157       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4158       break;
4159     case Intrinsics::kVarHandleSetVolatile:
4160       is_volatile = true;
4161       break;
4162     default:
4163       LOG(FATAL) << "GenerateVarHandleSet received non-set intrinsic " << invoke->GetIntrinsic();
4164   }
4165 
4166   InstructionCodeGeneratorX86* instr_codegen =
4167         down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
4168   // Store the value to the field
4169   instr_codegen->HandleFieldSet(
4170       invoke,
4171       value_index,
4172       value_type,
4173       Address(reference, offset, TIMES_1, 0),
4174       reference,
4175       is_volatile,
4176       /* value_can_be_null */ true,
4177       // Value can be null, and this write barrier is not being relied on for other sets.
4178       value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4179                                                  WriteBarrierKind::kDontEmit);
4180 
4181   __ Bind(slow_path->GetExitLabel());
4182 }
4183 
VisitVarHandleSet(HInvoke * invoke)4184 void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) {
4185   CreateVarHandleSetLocations(invoke, codegen_);
4186 }
4187 
VisitVarHandleSet(HInvoke * invoke)4188 void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
4189   GenerateVarHandleSet(invoke, codegen_);
4190 }
4191 
VisitVarHandleSetVolatile(HInvoke * invoke)4192 void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4193   CreateVarHandleSetLocations(invoke, codegen_);
4194 }
4195 
VisitVarHandleSetVolatile(HInvoke * invoke)4196 void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4197   GenerateVarHandleSet(invoke, codegen_);
4198 }
4199 
VisitVarHandleSetRelease(HInvoke * invoke)4200 void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4201   CreateVarHandleSetLocations(invoke, codegen_);
4202 }
4203 
VisitVarHandleSetRelease(HInvoke * invoke)4204 void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4205   GenerateVarHandleSet(invoke, codegen_);
4206 }
4207 
VisitVarHandleSetOpaque(HInvoke * invoke)4208 void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4209   CreateVarHandleSetLocations(invoke, codegen_);
4210 }
4211 
VisitVarHandleSetOpaque(HInvoke * invoke)4212 void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4213   GenerateVarHandleSet(invoke, codegen_);
4214 }
4215 
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4216 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4217   // The only read barrier implementation supporting the
4218   // VarHandleGet intrinsic is the Baker-style read barriers.
4219   if (codegen->EmitNonBakerReadBarrier()) {
4220     return;
4221   }
4222 
4223   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4224     return;
4225   }
4226 
4227   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4228   uint32_t value_index = number_of_arguments - 1;
4229   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4230 
4231   if (DataType::Is64BitType(value_type)) {
4232     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4233     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4234     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4235     // <EAX, EBX> to <EBX, ECX>).
4236     return;
4237   }
4238 
4239   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4240   LocationSummary* locations = new (allocator) LocationSummary(
4241       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4242   locations->AddTemp(Location::RequiresRegister());
4243   locations->AddTemp(Location::RequiresRegister());
4244   // We use this temporary for the card, so we need a byte register
4245   locations->AddTemp(Location::RegisterLocation(EBX));
4246   locations->SetInAt(0, Location::RequiresRegister());
4247   if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4248     // For instance fields, this is the source object
4249     locations->SetInAt(1, Location::RequiresRegister());
4250   } else {
4251     // For static fields, we need another temp because one will be busy with the declaring class.
4252     locations->AddTemp(Location::RequiresRegister());
4253   }
4254   if (value_type == DataType::Type::kFloat32) {
4255     locations->AddTemp(Location::RegisterLocation(EAX));
4256     locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index)));
4257     locations->SetOut(Location::RequiresFpuRegister());
4258   } else {
4259     locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4260     locations->SetOut(Location::RegisterLocation(EAX));
4261   }
4262 }
4263 
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4264 static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4265   // The only read barrier implementation supporting the
4266   // VarHandleGet intrinsic is the Baker-style read barriers.
4267   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4268 
4269   X86Assembler* assembler = codegen->GetAssembler();
4270   LocationSummary* locations = invoke->GetLocations();
4271   // The value we want to set is the last argument
4272   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4273   Location value = locations->InAt(value_index);
4274   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4275   Register temp = locations->GetTemp(1).AsRegister<Register>();
4276   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4277   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4278   codegen->AddSlowPath(slow_path);
4279 
4280   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4281 
4282   Register offset = locations->GetTemp(0).AsRegister<Register>();
4283   // Get the field referred by the VarHandle. The returned register contains the object reference
4284   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4285   // declaring class will be placed in 'temp' register.
4286   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4287   Address field_addr(reference, offset, TIMES_1, 0);
4288 
4289   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetRelease) {
4290     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4291   }
4292 
4293   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4294   // For static fields, we need another temporary for the declaring class. But since for instance
4295   // fields the object is in a separate register, it is safe to use the first temporary register.
4296   temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>();
4297   // No need for a lock prefix. `xchg` has an implicit lock when it is used with an address.
4298   switch (value_type) {
4299     case DataType::Type::kBool:
4300       __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4301       __ movzxb(locations->Out().AsRegister<Register>(),
4302                 locations->Out().AsRegister<ByteRegister>());
4303       break;
4304     case DataType::Type::kInt8:
4305       __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4306       __ movsxb(locations->Out().AsRegister<Register>(),
4307                 locations->Out().AsRegister<ByteRegister>());
4308       break;
4309     case DataType::Type::kUint16:
4310       __ xchgw(value.AsRegister<Register>(), field_addr);
4311       __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4312       break;
4313     case DataType::Type::kInt16:
4314       __ xchgw(value.AsRegister<Register>(), field_addr);
4315       __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4316       break;
4317     case DataType::Type::kInt32:
4318       __ xchgl(value.AsRegister<Register>(), field_addr);
4319       break;
4320     case DataType::Type::kFloat32:
4321       codegen->Move32(Location::RegisterLocation(EAX), value);
4322       __ xchgl(EAX, field_addr);
4323       __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
4324       break;
4325     case DataType::Type::kReference: {
4326       if (codegen->EmitBakerReadBarrier()) {
4327         // Need to make sure the reference stored in the field is a to-space
4328         // one before attempting the CAS or the CAS could fail incorrectly.
4329         codegen->GenerateReferenceLoadWithBakerReadBarrier(
4330             invoke,
4331             // Unused, used only as a "temporary" within the read barrier.
4332             Location::RegisterLocation(temp),
4333             reference,
4334             field_addr,
4335             /* needs_null_check= */ false,
4336             /* always_update_field= */ true,
4337             &temp2);
4338       }
4339       codegen->MarkGCCard(temp, temp2, reference);
4340       if (kPoisonHeapReferences) {
4341         __ movl(temp, value.AsRegister<Register>());
4342         __ PoisonHeapReference(temp);
4343         __ xchgl(temp, field_addr);
4344         __ UnpoisonHeapReference(temp);
4345         __ movl(locations->Out().AsRegister<Register>(), temp);
4346       } else {
4347         __ xchgl(locations->Out().AsRegister<Register>(), field_addr);
4348       }
4349       break;
4350     }
4351     default:
4352       LOG(FATAL) << "Unexpected type: " << value_type;
4353       UNREACHABLE();
4354   }
4355 
4356   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetAcquire) {
4357     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4358   }
4359 
4360   __ Bind(slow_path->GetExitLabel());
4361 }
4362 
VisitVarHandleGetAndSet(HInvoke * invoke)4363 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4364   CreateVarHandleGetAndSetLocations(invoke, codegen_);
4365 }
4366 
VisitVarHandleGetAndSet(HInvoke * invoke)4367 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4368   GenerateVarHandleGetAndSet(invoke, codegen_);
4369 }
4370 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4371 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4372   CreateVarHandleGetAndSetLocations(invoke, codegen_);
4373 }
4374 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4375 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4376   GenerateVarHandleGetAndSet(invoke, codegen_);
4377 }
4378 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4379 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4380   CreateVarHandleGetAndSetLocations(invoke, codegen_);
4381 }
4382 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4383 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4384   GenerateVarHandleGetAndSet(invoke, codegen_);
4385 }
4386 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4387 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4388                                                             CodeGeneratorX86* codegen) {
4389   // The only read barrier implementation supporting the
4390   // VarHandleGet intrinsic is the Baker-style read barriers.
4391   if (codegen->EmitNonBakerReadBarrier()) {
4392     return;
4393   }
4394 
4395   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4396     return;
4397   }
4398 
4399   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4400   uint32_t expected_value_index = number_of_arguments - 2;
4401   uint32_t new_value_index = number_of_arguments - 1;
4402   DataType::Type value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4403   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, new_value_index));
4404 
4405   if (DataType::Is64BitType(value_type)) {
4406     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4407     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4408     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4409     // <EAX, EBX> to <EBX, ECX>).
4410     return;
4411   }
4412 
4413   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4414   LocationSummary* locations = new (allocator) LocationSummary(
4415       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4416   locations->AddTemp(Location::RequiresRegister());
4417   locations->AddTemp(Location::RequiresRegister());
4418   // We use this temporary for the card, so we need a byte register
4419   locations->AddTemp(Location::RegisterLocation(EBX));
4420   locations->SetInAt(0, Location::RequiresRegister());
4421   if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4422     // For instance fields, this is the source object
4423     locations->SetInAt(1, Location::RequiresRegister());
4424   } else {
4425     // For static fields, we need another temp because one will be busy with the declaring class.
4426     locations->AddTemp(Location::RequiresRegister());
4427   }
4428   if (DataType::IsFloatingPointType(value_type)) {
4429     // We need EAX for placing the expected value
4430     locations->AddTemp(Location::RegisterLocation(EAX));
4431     locations->SetInAt(new_value_index,
4432                        Location::FpuRegisterOrConstant(invoke->InputAt(new_value_index)));
4433     locations->SetInAt(expected_value_index,
4434                        Location::FpuRegisterOrConstant(invoke->InputAt(expected_value_index)));
4435   } else {
4436     // Ensure it's in a byte register
4437     locations->SetInAt(new_value_index, Location::RegisterLocation(ECX));
4438     locations->SetInAt(expected_value_index, Location::RegisterLocation(EAX));
4439   }
4440 
4441   mirror::VarHandle::AccessModeTemplate access_mode_template =
4442       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4443 
4444   if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange &&
4445       value_type == DataType::Type::kFloat32) {
4446     locations->SetOut(Location::RequiresFpuRegister());
4447   } else {
4448     locations->SetOut(Location::RegisterLocation(EAX));
4449   }
4450 }
4451 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86 * codegen)4452 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
4453   // The only read barrier implementation supporting the
4454   // VarHandleGet intrinsic is the Baker-style read barriers.
4455   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4456 
4457   X86Assembler* assembler = codegen->GetAssembler();
4458   LocationSummary* locations = invoke->GetLocations();
4459   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4460   uint32_t expected_value_index = number_of_arguments - 2;
4461   uint32_t new_value_index = number_of_arguments - 1;
4462   DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4463   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4464   Location expected_value = locations->InAt(expected_value_index);
4465   Location new_value = locations->InAt(new_value_index);
4466   Register offset = locations->GetTemp(0).AsRegister<Register>();
4467   Register temp = locations->GetTemp(1).AsRegister<Register>();
4468   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4469   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4470   codegen->AddSlowPath(slow_path);
4471 
4472   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4473 
4474   // Get the field referred by the VarHandle. The returned register contains the object reference
4475   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4476   // declaring class will be placed in 'temp' register.
4477   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4478 
4479   uint32_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4480   // For generating the compare and exchange, we need 2 temporaries. In case of a static field, the
4481   // first temporary contains the declaring class so we need another temporary. In case of an
4482   // instance field, the object comes in a separate register so it's safe to use the first temp.
4483   temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(3).AsRegister<Register>();
4484   DCHECK_NE(temp, reference);
4485 
4486   // We are using `lock cmpxchg` in all cases because there is no CAS equivalent that has weak
4487   // failure semantics. `lock cmpxchg` has full barrier semantics, and we don't need scheduling
4488   // barriers at this time.
4489 
4490   mirror::VarHandle::AccessModeTemplate access_mode_template =
4491       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4492   bool is_cmpxchg =
4493       access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
4494 
4495   if (type == DataType::Type::kReference) {
4496     GenReferenceCAS(
4497         invoke, codegen, expected_value, new_value, reference, offset, temp, temp2, is_cmpxchg);
4498   } else {
4499     Location out = locations->Out();
4500     GenPrimitiveCAS(
4501         type, codegen, expected_value, new_value, reference, offset, out, temp, is_cmpxchg);
4502   }
4503 
4504   __ Bind(slow_path->GetExitLabel());
4505 }
4506 
VisitVarHandleCompareAndSet(HInvoke * invoke)4507 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4508   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4509 }
4510 
VisitVarHandleCompareAndSet(HInvoke * invoke)4511 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4512   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4513 }
4514 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4515 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4516   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4517 }
4518 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4519 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4520   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4521 }
4522 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4523 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4524   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4525 }
4526 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4527 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4528   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4529 }
4530 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4531 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4532   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4533 }
4534 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4535 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4536   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4537 }
4538 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4539 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4540   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4541 }
4542 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4543 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4544   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4545 }
4546 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4547 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4548   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4549 }
4550 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4551 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4552   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4553 }
4554 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4555 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4556   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4557 }
4558 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4559 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4560   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4561 }
4562 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4563 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4564   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4565 }
4566 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4567 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4568   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4569 }
4570 
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4571 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4572   // The only read barrier implementation supporting the
4573   // VarHandleGet intrinsic is the Baker-style read barriers.
4574   if (codegen->EmitNonBakerReadBarrier()) {
4575     return;
4576   }
4577 
4578   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4579     return;
4580   }
4581 
4582   // The last argument should be the value we intend to set.
4583   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4584   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4585   if (DataType::Is64BitType(value_type)) {
4586     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4587     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4588     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4589     // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4590     return;
4591   }
4592 
4593   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4594   LocationSummary* locations = new (allocator) LocationSummary(
4595       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4596   locations->AddTemp(Location::RequiresRegister());
4597   locations->AddTemp(Location::RequiresRegister());
4598   locations->SetInAt(0, Location::RequiresRegister());
4599   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4600   if (expected_coordinates_count == 1u) {
4601     // For instance fields, this is the source object
4602     locations->SetInAt(1, Location::RequiresRegister());
4603   } else {
4604     // For static fields, we need another temp because one will be busy with the declaring class.
4605     locations->AddTemp(Location::RequiresRegister());
4606   }
4607 
4608   if (DataType::IsFloatingPointType(value_type)) {
4609     locations->AddTemp(Location::RequiresFpuRegister());
4610     locations->AddTemp(Location::RegisterLocation(EAX));
4611     locations->SetInAt(value_index, Location::RequiresFpuRegister());
4612     locations->SetOut(Location::RequiresFpuRegister());
4613   } else {
4614     // xadd updates the register argument with the old value. ByteRegister required for xaddb.
4615     locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4616     locations->SetOut(Location::RegisterLocation(EAX));
4617   }
4618 }
4619 
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86 * codegen)4620 static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
4621   // The only read barrier implementation supporting the
4622   // VarHandleGet intrinsic is the Baker-style read barriers.
4623   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4624 
4625   X86Assembler* assembler = codegen->GetAssembler();
4626   LocationSummary* locations = invoke->GetLocations();
4627   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4628   uint32_t value_index = number_of_arguments - 1;
4629   DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4630   DCHECK_EQ(type, invoke->GetType());
4631   Location value_loc = locations->InAt(value_index);
4632   Register temp = locations->GetTemp(0).AsRegister<Register>();
4633   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4634   codegen->AddSlowPath(slow_path);
4635 
4636   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4637 
4638   Register offset = locations->GetTemp(1).AsRegister<Register>();
4639   // Get the field referred by the VarHandle. The returned register contains the object reference
4640   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4641   // declaring class will be placed in 'temp' register.
4642   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4643 
4644   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4645   temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4646   DCHECK_NE(temp, reference);
4647   Address field_addr(reference, offset, TIMES_1, 0);
4648 
4649   switch (type) {
4650     case DataType::Type::kInt8:
4651       __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
4652       __ movsxb(locations->Out().AsRegister<Register>(),
4653                 locations->Out().AsRegister<ByteRegister>());
4654       break;
4655     case DataType::Type::kInt16:
4656       __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4657       __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4658       break;
4659     case DataType::Type::kUint16:
4660       __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4661       __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4662       break;
4663     case DataType::Type::kInt32:
4664       __ LockXaddl(field_addr, value_loc.AsRegister<Register>());
4665       break;
4666     case DataType::Type::kFloat32: {
4667       Location temp_float =
4668           (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3);
4669       DCHECK(temp_float.IsFpuRegister());
4670       Location eax = Location::RegisterLocation(EAX);
4671       NearLabel try_again;
4672       __ Bind(&try_again);
4673       __ movss(temp_float.AsFpuRegister<XmmRegister>(), field_addr);
4674       __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
4675       __ addss(temp_float.AsFpuRegister<XmmRegister>(),
4676                value_loc.AsFpuRegister<XmmRegister>());
4677       GenPrimitiveLockedCmpxchg(type,
4678                                 codegen,
4679                                 /* expected_value= */ eax,
4680                                 /* new_value= */ temp_float,
4681                                 reference,
4682                                 offset,
4683                                 temp);
4684       __ j(kNotZero, &try_again);
4685 
4686       // The old value is present in EAX.
4687       codegen->Move32(locations->Out(), eax);
4688       break;
4689     }
4690     default:
4691       LOG(FATAL) << "Unexpected type: " << type;
4692       UNREACHABLE();
4693   }
4694 
4695   __ Bind(slow_path->GetExitLabel());
4696 }
4697 
VisitVarHandleGetAndAdd(HInvoke * invoke)4698 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4699   CreateVarHandleGetAndAddLocations(invoke, codegen_);
4700 }
4701 
VisitVarHandleGetAndAdd(HInvoke * invoke)4702 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4703   GenerateVarHandleGetAndAdd(invoke, codegen_);
4704 }
4705 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4706 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4707   CreateVarHandleGetAndAddLocations(invoke, codegen_);
4708 }
4709 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4710 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4711   GenerateVarHandleGetAndAdd(invoke, codegen_);
4712 }
4713 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4714 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4715   CreateVarHandleGetAndAddLocations(invoke, codegen_);
4716 }
4717 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4718 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4719   GenerateVarHandleGetAndAdd(invoke, codegen_);
4720 }
4721 
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4722 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4723   // The only read barrier implementation supporting the
4724   // VarHandleGet intrinsic is the Baker-style read barriers.
4725   if (codegen->EmitNonBakerReadBarrier()) {
4726     return;
4727   }
4728 
4729   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4730     return;
4731   }
4732 
4733   // The last argument should be the value we intend to set.
4734   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4735   if (DataType::Is64BitType(GetDataTypeFromShorty(invoke, value_index))) {
4736     // We avoid the case of an Int64 value because we would need to place it in a register pair.
4737     // If the slow path is taken, the ParallelMove might fail to move the pair according to the
4738     // X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4739     // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4740     return;
4741   }
4742 
4743   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4744   LocationSummary* locations = new (allocator) LocationSummary(
4745       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4746   // We need a byte register temp to store the result of the bitwise operation
4747   locations->AddTemp(Location::RegisterLocation(EBX));
4748   locations->AddTemp(Location::RequiresRegister());
4749   locations->SetInAt(0, Location::RequiresRegister());
4750   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4751   if (expected_coordinates_count == 1u) {
4752     // For instance fields, this is the source object
4753     locations->SetInAt(1, Location::RequiresRegister());
4754   } else {
4755     // For static fields, we need another temp because one will be busy with the declaring class.
4756     locations->AddTemp(Location::RequiresRegister());
4757   }
4758 
4759   locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index)));
4760   locations->SetOut(Location::RegisterLocation(EAX));
4761 }
4762 
GenerateBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen,Register left,Register right)4763 static void GenerateBitwiseOp(HInvoke* invoke,
4764                               CodeGeneratorX86* codegen,
4765                               Register left,
4766                               Register right) {
4767   X86Assembler* assembler = codegen->GetAssembler();
4768 
4769   switch (invoke->GetIntrinsic()) {
4770     case Intrinsics::kVarHandleGetAndBitwiseOr:
4771     case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
4772     case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
4773       __ orl(left, right);
4774       break;
4775     case Intrinsics::kVarHandleGetAndBitwiseXor:
4776     case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
4777     case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
4778       __ xorl(left, right);
4779       break;
4780     case Intrinsics::kVarHandleGetAndBitwiseAnd:
4781     case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
4782     case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
4783       __ andl(left, right);
4784       break;
4785     default:
4786       LOG(FATAL) << "Unexpected intrinsic: " << invoke->GetIntrinsic();
4787       UNREACHABLE();
4788   }
4789 }
4790 
GenerateVarHandleGetAndBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen)4791 static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
4792   // The only read barrier implementation supporting the
4793   // VarHandleGet intrinsic is the Baker-style read barriers.
4794   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4795 
4796   X86Assembler* assembler = codegen->GetAssembler();
4797   LocationSummary* locations = invoke->GetLocations();
4798   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4799   DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4800   DCHECK_EQ(type, invoke->GetType());
4801   Register temp = locations->GetTemp(0).AsRegister<Register>();
4802   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4803   codegen->AddSlowPath(slow_path);
4804 
4805   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4806 
4807   Register offset = locations->GetTemp(1).AsRegister<Register>();
4808   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4809   // For static field, we need another temporary because the first one contains the declaring class
4810   Register reference =
4811       (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4812   // Get the field referred by the VarHandle. The returned register contains the object reference
4813   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4814   // declaring class will be placed in 'reference' register.
4815   reference = GenerateVarHandleFieldReference(invoke, codegen, reference, offset);
4816   DCHECK_NE(temp, reference);
4817   Address field_addr(reference, offset, TIMES_1, 0);
4818 
4819   Register out = locations->Out().AsRegister<Register>();
4820   DCHECK_EQ(out, EAX);
4821 
4822   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease ||
4823       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease ||
4824       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndRelease) {
4825     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4826   }
4827 
4828   NearLabel try_again;
4829   __ Bind(&try_again);
4830   // Place the expected value in EAX for cmpxchg
4831   codegen->LoadFromMemoryNoBarrier(type, locations->Out(), field_addr);
4832   codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
4833   GenerateBitwiseOp(invoke, codegen, temp, out);
4834   GenPrimitiveLockedCmpxchg(type,
4835                             codegen,
4836                             /* expected_value= */ locations->Out(),
4837                             /* new_value= */ locations->GetTemp(0),
4838                             reference,
4839                             offset);
4840   // If the cmpxchg failed, another thread changed the value so try again.
4841   __ j(kNotZero, &try_again);
4842 
4843   // The old value is present in EAX.
4844 
4845   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrAcquire ||
4846       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorAcquire ||
4847       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndAcquire) {
4848     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4849   }
4850 
4851   __ Bind(slow_path->GetExitLabel());
4852 }
4853 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4854 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4855   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4856 }
4857 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4858 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4859   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4860 }
4861 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4862 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4863   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4864 }
4865 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4866 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4867   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4868 }
4869 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4870 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4871   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4872 }
4873 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4874 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4875   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4876 }
4877 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4878 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4879   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4880 }
4881 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4882 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4883   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4884 }
4885 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4886 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4887   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4888 }
4889 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4890 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4891   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4892 }
4893 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4894 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4895   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4896 }
4897 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4898 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4899   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4900 }
4901 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4902 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4903   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4904 }
4905 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4906 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4907   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4908 }
4909 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4910 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4911   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4912 }
4913 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4914 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4915   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4916 }
4917 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4918 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4919   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4920 }
4921 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4922 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4923   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4924 }
4925 
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86 * codegen)4926 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86* codegen) {
4927   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
4928   LocationSummary* locations = invoke->GetLocations();
4929   DCHECK(locations->InAt(0).Equals(locations->Out()));
4930   X86Assembler* assembler = codegen->GetAssembler();
4931   XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
4932   XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
4933   XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
4934   if (invoke->GetType() == DataType::Type::kFloat32) {
4935     __ vfmadd213ss(left, right, accumulator);
4936   } else {
4937     DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
4938     __ vfmadd213sd(left, right, accumulator);
4939   }
4940 }
4941 
VisitMathFmaDouble(HInvoke * invoke)4942 void IntrinsicCodeGeneratorX86::VisitMathFmaDouble(HInvoke* invoke) {
4943   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
4944   GenerateMathFma(invoke, codegen_);
4945 }
4946 
VisitMathFmaDouble(HInvoke * invoke)4947 void IntrinsicLocationsBuilderX86::VisitMathFmaDouble(HInvoke* invoke) {
4948   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
4949     CreateFPFPFPToFPCallLocations(allocator_, invoke);
4950   }
4951 }
4952 
VisitMathFmaFloat(HInvoke * invoke)4953 void IntrinsicCodeGeneratorX86::VisitMathFmaFloat(HInvoke* invoke) {
4954   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
4955   GenerateMathFma(invoke, codegen_);
4956 }
4957 
VisitMathFmaFloat(HInvoke * invoke)4958 void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) {
4959   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
4960     CreateFPFPFPToFPCallLocations(allocator_, invoke);
4961   }
4962 }
4963 
4964 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name)
4965 UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED);
4966 #undef MARK_UNIMPLEMENTED
4967 
4968 UNREACHABLE_INTRINSICS(X86)
4969 
4970 #undef __
4971 
4972 }  // namespace x86
4973 }  // namespace art
4974