1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86.h"
18
19 #include <limits>
20
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsic_objects.h"
29 #include "intrinsics.h"
30 #include "intrinsics_utils.h"
31 #include "lock_word.h"
32 #include "mirror/array-inl.h"
33 #include "mirror/object_array-inl.h"
34 #include "mirror/reference.h"
35 #include "mirror/string.h"
36 #include "mirror/var_handle.h"
37 #include "scoped_thread_state_change-inl.h"
38 #include "thread-current-inl.h"
39 #include "utils/x86/assembler_x86.h"
40 #include "utils/x86/constants_x86.h"
41 #include "well_known_classes.h"
42
43 namespace art HIDDEN {
44
45 namespace x86 {
46
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)47 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
48 : allocator_(codegen->GetGraph()->GetAllocator()),
49 codegen_(codegen) {
50 }
51
52
GetAssembler()53 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
54 return down_cast<X86Assembler*>(codegen_->GetAssembler());
55 }
56
GetAllocator()57 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
58 return codegen_->GetGraph()->GetAllocator();
59 }
60
TryDispatch(HInvoke * invoke)61 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
62 Dispatch(invoke);
63 LocationSummary* res = invoke->GetLocations();
64 if (res == nullptr) {
65 return false;
66 }
67 return res->Intrinsified();
68 }
69
70 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
71
72 #define __ assembler->
73
GenArrayAddress(X86Assembler * assembler,Register dest,Register base,Location pos,DataType::Type type,uint32_t data_offset)74 static void GenArrayAddress(X86Assembler* assembler,
75 Register dest,
76 Register base,
77 Location pos,
78 DataType::Type type,
79 uint32_t data_offset) {
80 if (pos.IsConstant()) {
81 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
82 __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
83 } else {
84 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
85 __ leal(dest, Address(base, pos.AsRegister<Register>(), scale_factor, data_offset));
86 }
87 }
88
89 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
90 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
91 public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)92 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
93 : SlowPathCode(instruction) {
94 }
95
EmitNativeCode(CodeGenerator * codegen)96 void EmitNativeCode(CodeGenerator* codegen) override {
97 DCHECK(codegen->EmitBakerReadBarrier());
98 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
99 X86Assembler* assembler = x86_codegen->GetAssembler();
100 LocationSummary* locations = instruction_->GetLocations();
101 DCHECK(locations->CanCall());
102 DCHECK(instruction_->IsInvokeStaticOrDirect())
103 << "Unexpected instruction in read barrier arraycopy slow path: "
104 << instruction_->DebugName();
105 DCHECK(instruction_->GetLocations()->Intrinsified());
106 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
107 Location length = locations->InAt(4);
108
109 const DataType::Type type = DataType::Type::kReference;
110 const int32_t element_size = DataType::Size(type);
111
112 Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
113 Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
114 Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
115 Register value = locations->GetTemp(3).AsRegister<Register>();
116
117 __ Bind(GetEntryLabel());
118 // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
119 GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
120
121 NearLabel loop;
122 __ Bind(&loop);
123 __ movl(value, Address(src_curr_addr, 0));
124 __ MaybeUnpoisonHeapReference(value);
125 // TODO: Inline the mark bit check before calling the runtime?
126 // value = ReadBarrier::Mark(value)
127 // No need to save live registers; it's taken care of by the
128 // entrypoint. Also, there is no need to update the stack mask,
129 // as this runtime call will not trigger a garbage collection.
130 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
131 // explanations.)
132 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(value);
133 // This runtime call does not require a stack map.
134 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
135 __ MaybePoisonHeapReference(value);
136 __ movl(Address(dst_curr_addr, 0), value);
137 __ addl(src_curr_addr, Immediate(element_size));
138 __ addl(dst_curr_addr, Immediate(element_size));
139 __ cmpl(src_curr_addr, src_stop_addr);
140 __ j(kNotEqual, &loop);
141 __ jmp(GetExitLabel());
142 }
143
GetDescription() const144 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
145
146 private:
147 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
148 };
149
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)150 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
151 LocationSummary* locations =
152 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
153 locations->SetInAt(0, Location::RequiresFpuRegister());
154 locations->SetOut(Location::RequiresRegister());
155 if (is64bit) {
156 locations->AddTemp(Location::RequiresFpuRegister());
157 }
158 }
159
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)160 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
161 LocationSummary* locations =
162 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
163 locations->SetInAt(0, Location::RequiresRegister());
164 locations->SetOut(Location::RequiresFpuRegister());
165 if (is64bit) {
166 locations->AddTemp(Location::RequiresFpuRegister());
167 locations->AddTemp(Location::RequiresFpuRegister());
168 }
169 }
170
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)171 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
172 Location input = locations->InAt(0);
173 Location output = locations->Out();
174 if (is64bit) {
175 // Need to use the temporary.
176 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
177 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
178 __ movd(output.AsRegisterPairLow<Register>(), temp);
179 __ psrlq(temp, Immediate(32));
180 __ movd(output.AsRegisterPairHigh<Register>(), temp);
181 } else {
182 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
183 }
184 }
185
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)186 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
187 Location input = locations->InAt(0);
188 Location output = locations->Out();
189 if (is64bit) {
190 // Need to use the temporary.
191 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
192 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
193 __ movd(temp1, input.AsRegisterPairLow<Register>());
194 __ movd(temp2, input.AsRegisterPairHigh<Register>());
195 __ punpckldq(temp1, temp2);
196 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
197 } else {
198 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
199 }
200 }
201
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)202 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
203 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
204 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)205 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
206 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
207 }
208
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)209 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
210 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
211 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)212 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
213 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
214 }
215
VisitFloatFloatToRawIntBits(HInvoke * invoke)216 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
217 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
218 }
VisitFloatIntBitsToFloat(HInvoke * invoke)219 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
220 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
221 }
222
VisitFloatFloatToRawIntBits(HInvoke * invoke)223 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
224 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
225 }
VisitFloatIntBitsToFloat(HInvoke * invoke)226 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
227 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
228 }
229
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)230 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
231 LocationSummary* locations =
232 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
233 locations->SetInAt(0, Location::RequiresRegister());
234 locations->SetOut(Location::SameAsFirstInput());
235 }
236
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)237 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
238 LocationSummary* locations =
239 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
240 locations->SetInAt(0, Location::RequiresRegister());
241 locations->SetOut(Location::RequiresRegister());
242 }
243
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)244 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
245 LocationSummary* locations =
246 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
247 locations->SetInAt(0, Location::RequiresRegister());
248 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
249 }
250
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)251 static void GenReverseBytes(LocationSummary* locations,
252 DataType::Type size,
253 X86Assembler* assembler) {
254 Register out = locations->Out().AsRegister<Register>();
255
256 switch (size) {
257 case DataType::Type::kInt16:
258 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
259 __ bswapl(out);
260 __ sarl(out, Immediate(16));
261 break;
262 case DataType::Type::kInt32:
263 __ bswapl(out);
264 break;
265 default:
266 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
267 UNREACHABLE();
268 }
269 }
270
VisitIntegerReverseBytes(HInvoke * invoke)271 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
272 CreateIntToIntLocations(allocator_, invoke);
273 }
274
VisitIntegerReverseBytes(HInvoke * invoke)275 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
276 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
277 }
278
VisitLongReverseBytes(HInvoke * invoke)279 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
280 CreateLongToLongLocations(allocator_, invoke);
281 }
282
VisitLongReverseBytes(HInvoke * invoke)283 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
284 LocationSummary* locations = invoke->GetLocations();
285 Location input = locations->InAt(0);
286 Register input_lo = input.AsRegisterPairLow<Register>();
287 Register input_hi = input.AsRegisterPairHigh<Register>();
288 Location output = locations->Out();
289 Register output_lo = output.AsRegisterPairLow<Register>();
290 Register output_hi = output.AsRegisterPairHigh<Register>();
291
292 X86Assembler* assembler = GetAssembler();
293 // Assign the inputs to the outputs, mixing low/high.
294 __ movl(output_lo, input_hi);
295 __ movl(output_hi, input_lo);
296 __ bswapl(output_lo);
297 __ bswapl(output_hi);
298 }
299
VisitShortReverseBytes(HInvoke * invoke)300 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
301 CreateIntToIntLocations(allocator_, invoke);
302 }
303
VisitShortReverseBytes(HInvoke * invoke)304 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
305 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
306 }
307
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)308 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
309 LocationSummary* locations =
310 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
311 locations->SetInAt(0, Location::RequiresFpuRegister());
312 locations->SetOut(Location::RequiresFpuRegister());
313 }
314
VisitMathSqrt(HInvoke * invoke)315 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
316 CreateFPToFPLocations(allocator_, invoke);
317 }
318
VisitMathSqrt(HInvoke * invoke)319 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
320 LocationSummary* locations = invoke->GetLocations();
321 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
322 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
323
324 GetAssembler()->sqrtsd(out, in);
325 }
326
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)327 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
328 HInvoke* invoke,
329 CodeGeneratorX86* codegen) {
330 // Do we have instruction support?
331 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
332 return;
333 }
334
335 CreateFPToFPLocations(allocator, invoke);
336 }
337
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)338 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
339 LocationSummary* locations = invoke->GetLocations();
340 DCHECK(!locations->WillCall());
341 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
342 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
343 __ roundsd(out, in, Immediate(round_mode));
344 }
345
VisitMathCeil(HInvoke * invoke)346 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
347 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
348 }
349
VisitMathCeil(HInvoke * invoke)350 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
351 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
352 }
353
VisitMathFloor(HInvoke * invoke)354 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
355 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
356 }
357
VisitMathFloor(HInvoke * invoke)358 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
359 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
360 }
361
VisitMathRint(HInvoke * invoke)362 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
363 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
364 }
365
VisitMathRint(HInvoke * invoke)366 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
367 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
368 }
369
VisitMathRoundFloat(HInvoke * invoke)370 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
371 // Do we have instruction support?
372 if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
373 return;
374 }
375
376 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
377 LocationSummary* locations =
378 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
379 locations->SetInAt(0, Location::RequiresFpuRegister());
380 if (static_or_direct->HasSpecialInput() &&
381 invoke->InputAt(
382 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
383 locations->SetInAt(1, Location::RequiresRegister());
384 }
385 locations->SetOut(Location::RequiresRegister());
386 locations->AddTemp(Location::RequiresFpuRegister());
387 locations->AddTemp(Location::RequiresFpuRegister());
388 }
389
VisitMathRoundFloat(HInvoke * invoke)390 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
391 LocationSummary* locations = invoke->GetLocations();
392 DCHECK(!locations->WillCall());
393
394 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
395 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
396 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
397 Register out = locations->Out().AsRegister<Register>();
398 NearLabel skip_incr, done;
399 X86Assembler* assembler = GetAssembler();
400
401 // Since no direct x86 rounding instruction matches the required semantics,
402 // this intrinsic is implemented as follows:
403 // result = floor(in);
404 // if (in - result >= 0.5f)
405 // result = result + 1.0f;
406 __ movss(t2, in);
407 __ roundss(t1, in, Immediate(1));
408 __ subss(t2, t1);
409 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
410 // Direct constant area available.
411 HX86ComputeBaseMethodAddress* method_address =
412 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
413 Register constant_area = locations->InAt(1).AsRegister<Register>();
414 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
415 method_address,
416 constant_area));
417 __ j(kBelow, &skip_incr);
418 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
419 method_address,
420 constant_area));
421 __ Bind(&skip_incr);
422 } else {
423 // No constant area: go through stack.
424 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
425 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
426 __ comiss(t2, Address(ESP, 4));
427 __ j(kBelow, &skip_incr);
428 __ addss(t1, Address(ESP, 0));
429 __ Bind(&skip_incr);
430 __ addl(ESP, Immediate(8));
431 }
432
433 // Final conversion to an integer. Unfortunately this also does not have a
434 // direct x86 instruction, since NaN should map to 0 and large positive
435 // values need to be clipped to the extreme value.
436 __ movl(out, Immediate(kPrimIntMax));
437 __ cvtsi2ss(t2, out);
438 __ comiss(t1, t2);
439 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
440 __ movl(out, Immediate(0)); // does not change flags
441 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
442 __ cvttss2si(out, t1);
443 __ Bind(&done);
444 }
445
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)446 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
447 LocationSummary* locations =
448 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
449 InvokeRuntimeCallingConvention calling_convention;
450 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
451 locations->SetOut(Location::FpuRegisterLocation(XMM0));
452 }
453
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)454 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
455 LocationSummary* locations = invoke->GetLocations();
456 DCHECK(locations->WillCall());
457 DCHECK(invoke->IsInvokeStaticOrDirect());
458 X86Assembler* assembler = codegen->GetAssembler();
459
460 // We need some place to pass the parameters.
461 __ subl(ESP, Immediate(16));
462 __ cfi().AdjustCFAOffset(16);
463
464 // Pass the parameters at the bottom of the stack.
465 __ movsd(Address(ESP, 0), XMM0);
466
467 // If we have a second parameter, pass it next.
468 if (invoke->GetNumberOfArguments() == 2) {
469 __ movsd(Address(ESP, 8), XMM1);
470 }
471
472 // Now do the actual call.
473 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
474
475 // Extract the return value from the FP stack.
476 __ fstpl(Address(ESP, 0));
477 __ movsd(XMM0, Address(ESP, 0));
478
479 // And clean up the stack.
480 __ addl(ESP, Immediate(16));
481 __ cfi().AdjustCFAOffset(-16);
482 }
483
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)484 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
485 LocationSummary* locations =
486 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
487 if (is_long) {
488 locations->SetInAt(0, Location::RequiresRegister());
489 } else {
490 locations->SetInAt(0, Location::Any());
491 }
492 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
493 }
494
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)495 static void GenLowestOneBit(X86Assembler* assembler,
496 CodeGeneratorX86* codegen,
497 bool is_long,
498 HInvoke* invoke) {
499 LocationSummary* locations = invoke->GetLocations();
500 Location src = locations->InAt(0);
501 Location out_loc = locations->Out();
502
503 if (invoke->InputAt(0)->IsConstant()) {
504 // Evaluate this at compile time.
505 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
506 if (value == 0) {
507 if (is_long) {
508 __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
509 __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
510 } else {
511 __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
512 }
513 return;
514 }
515 // Nonzero value.
516 value = is_long ? CTZ(static_cast<uint64_t>(value))
517 : CTZ(static_cast<uint32_t>(value));
518 if (is_long) {
519 if (value >= 32) {
520 int shift = value-32;
521 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
522 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
523 } else {
524 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
525 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
526 }
527 } else {
528 codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
529 }
530 return;
531 }
532 // Handle non constant case
533 if (is_long) {
534 DCHECK(src.IsRegisterPair());
535 Register src_lo = src.AsRegisterPairLow<Register>();
536 Register src_hi = src.AsRegisterPairHigh<Register>();
537
538 Register out_lo = out_loc.AsRegisterPairLow<Register>();
539 Register out_hi = out_loc.AsRegisterPairHigh<Register>();
540
541 __ movl(out_lo, src_lo);
542 __ movl(out_hi, src_hi);
543
544 __ negl(out_lo);
545 __ adcl(out_hi, Immediate(0));
546 __ negl(out_hi);
547
548 __ andl(out_lo, src_lo);
549 __ andl(out_hi, src_hi);
550 } else {
551 if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
552 Register out = out_loc.AsRegister<Register>();
553 __ blsi(out, src.AsRegister<Register>());
554 } else {
555 Register out = out_loc.AsRegister<Register>();
556 // Do tmp & -tmp
557 if (src.IsRegister()) {
558 __ movl(out, src.AsRegister<Register>());
559 } else {
560 DCHECK(src.IsStackSlot());
561 __ movl(out, Address(ESP, src.GetStackIndex()));
562 }
563 __ negl(out);
564
565 if (src.IsRegister()) {
566 __ andl(out, src.AsRegister<Register>());
567 } else {
568 __ andl(out, Address(ESP, src.GetStackIndex()));
569 }
570 }
571 }
572 }
573
VisitMathCos(HInvoke * invoke)574 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
575 CreateFPToFPCallLocations(allocator_, invoke);
576 }
577
VisitMathCos(HInvoke * invoke)578 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
579 GenFPToFPCall(invoke, codegen_, kQuickCos);
580 }
581
VisitMathSin(HInvoke * invoke)582 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
583 CreateFPToFPCallLocations(allocator_, invoke);
584 }
585
VisitMathSin(HInvoke * invoke)586 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
587 GenFPToFPCall(invoke, codegen_, kQuickSin);
588 }
589
VisitMathAcos(HInvoke * invoke)590 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
591 CreateFPToFPCallLocations(allocator_, invoke);
592 }
593
VisitMathAcos(HInvoke * invoke)594 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
595 GenFPToFPCall(invoke, codegen_, kQuickAcos);
596 }
597
VisitMathAsin(HInvoke * invoke)598 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
599 CreateFPToFPCallLocations(allocator_, invoke);
600 }
601
VisitMathAsin(HInvoke * invoke)602 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
603 GenFPToFPCall(invoke, codegen_, kQuickAsin);
604 }
605
VisitMathAtan(HInvoke * invoke)606 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
607 CreateFPToFPCallLocations(allocator_, invoke);
608 }
609
VisitMathAtan(HInvoke * invoke)610 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
611 GenFPToFPCall(invoke, codegen_, kQuickAtan);
612 }
613
VisitMathCbrt(HInvoke * invoke)614 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
615 CreateFPToFPCallLocations(allocator_, invoke);
616 }
617
VisitMathCbrt(HInvoke * invoke)618 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
619 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
620 }
621
VisitMathCosh(HInvoke * invoke)622 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
623 CreateFPToFPCallLocations(allocator_, invoke);
624 }
625
VisitMathCosh(HInvoke * invoke)626 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
627 GenFPToFPCall(invoke, codegen_, kQuickCosh);
628 }
629
VisitMathExp(HInvoke * invoke)630 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
631 CreateFPToFPCallLocations(allocator_, invoke);
632 }
633
VisitMathExp(HInvoke * invoke)634 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
635 GenFPToFPCall(invoke, codegen_, kQuickExp);
636 }
637
VisitMathExpm1(HInvoke * invoke)638 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
639 CreateFPToFPCallLocations(allocator_, invoke);
640 }
641
VisitMathExpm1(HInvoke * invoke)642 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
643 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
644 }
645
VisitMathLog(HInvoke * invoke)646 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
647 CreateFPToFPCallLocations(allocator_, invoke);
648 }
649
VisitMathLog(HInvoke * invoke)650 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
651 GenFPToFPCall(invoke, codegen_, kQuickLog);
652 }
653
VisitMathLog10(HInvoke * invoke)654 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
655 CreateFPToFPCallLocations(allocator_, invoke);
656 }
657
VisitMathLog10(HInvoke * invoke)658 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
659 GenFPToFPCall(invoke, codegen_, kQuickLog10);
660 }
661
VisitMathSinh(HInvoke * invoke)662 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
663 CreateFPToFPCallLocations(allocator_, invoke);
664 }
665
VisitMathSinh(HInvoke * invoke)666 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
667 GenFPToFPCall(invoke, codegen_, kQuickSinh);
668 }
669
VisitMathTan(HInvoke * invoke)670 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
671 CreateFPToFPCallLocations(allocator_, invoke);
672 }
673
VisitMathTan(HInvoke * invoke)674 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
675 GenFPToFPCall(invoke, codegen_, kQuickTan);
676 }
677
VisitMathTanh(HInvoke * invoke)678 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
679 CreateFPToFPCallLocations(allocator_, invoke);
680 }
681
VisitMathTanh(HInvoke * invoke)682 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
683 GenFPToFPCall(invoke, codegen_, kQuickTanh);
684 }
685
VisitIntegerLowestOneBit(HInvoke * invoke)686 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
687 CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
688 }
VisitIntegerLowestOneBit(HInvoke * invoke)689 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
690 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
691 }
692
VisitLongLowestOneBit(HInvoke * invoke)693 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
694 CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
695 }
696
VisitLongLowestOneBit(HInvoke * invoke)697 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
698 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
699 }
700
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)701 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
702 LocationSummary* locations =
703 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
704 InvokeRuntimeCallingConvention calling_convention;
705 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
706 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
707 locations->SetOut(Location::FpuRegisterLocation(XMM0));
708 }
709
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)710 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
711 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
712 LocationSummary* locations =
713 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
714 InvokeRuntimeCallingConvention calling_convention;
715 locations->SetInAt(0, Location::RequiresFpuRegister());
716 locations->SetInAt(1, Location::RequiresFpuRegister());
717 locations->SetInAt(2, Location::RequiresFpuRegister());
718 locations->SetOut(Location::SameAsFirstInput());
719 }
720
VisitMathAtan2(HInvoke * invoke)721 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
722 CreateFPFPToFPCallLocations(allocator_, invoke);
723 }
724
VisitMathAtan2(HInvoke * invoke)725 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
726 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
727 }
728
VisitMathPow(HInvoke * invoke)729 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
730 CreateFPFPToFPCallLocations(allocator_, invoke);
731 }
732
VisitMathPow(HInvoke * invoke)733 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
734 GenFPToFPCall(invoke, codegen_, kQuickPow);
735 }
736
VisitMathHypot(HInvoke * invoke)737 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
738 CreateFPFPToFPCallLocations(allocator_, invoke);
739 }
740
VisitMathHypot(HInvoke * invoke)741 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
742 GenFPToFPCall(invoke, codegen_, kQuickHypot);
743 }
744
VisitMathNextAfter(HInvoke * invoke)745 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
746 CreateFPFPToFPCallLocations(allocator_, invoke);
747 }
748
VisitMathNextAfter(HInvoke * invoke)749 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
750 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
751 }
752
CreateSystemArrayCopyLocations(HInvoke * invoke)753 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
754 // We need at least two of the positions or length to be an integer constant,
755 // or else we won't have enough free registers.
756 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
757 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
758 HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
759
760 int num_constants =
761 ((src_pos != nullptr) ? 1 : 0)
762 + ((dest_pos != nullptr) ? 1 : 0)
763 + ((length != nullptr) ? 1 : 0);
764
765 if (num_constants < 2) {
766 // Not enough free registers.
767 return;
768 }
769
770 // As long as we are checking, we might as well check to see if the src and dest
771 // positions are >= 0.
772 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
773 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
774 // We will have to fail anyways.
775 return;
776 }
777
778 // And since we are already checking, check the length too.
779 if (length != nullptr) {
780 int32_t len = length->GetValue();
781 if (len < 0) {
782 // Just call as normal.
783 return;
784 }
785 }
786
787 // Okay, it is safe to generate inline code.
788 LocationSummary* locations =
789 new (invoke->GetBlock()->GetGraph()->GetAllocator())
790 LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
791 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
792 locations->SetInAt(0, Location::RequiresRegister());
793 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
794 locations->SetInAt(2, Location::RequiresRegister());
795 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
796 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
797
798 // And we need some temporaries. We will use REP MOVS{B,W,L}, so we need fixed registers.
799 locations->AddTemp(Location::RegisterLocation(ESI));
800 locations->AddTemp(Location::RegisterLocation(EDI));
801 locations->AddTemp(Location::RegisterLocation(ECX));
802 }
803
804 template <typename LhsType>
EmitCmplJLess(X86Assembler * assembler,LhsType lhs,Location rhs,Label * label)805 static void EmitCmplJLess(X86Assembler* assembler,
806 LhsType lhs,
807 Location rhs,
808 Label* label) {
809 static_assert(std::is_same_v<LhsType, Register> || std::is_same_v<LhsType, Address>);
810 if (rhs.IsConstant()) {
811 int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
812 __ cmpl(lhs, Immediate(rhs_constant));
813 } else {
814 __ cmpl(lhs, rhs.AsRegister<Register>());
815 }
816 __ j(kLess, label);
817 }
818
CheckSystemArrayCopyPosition(X86Assembler * assembler,Register array,Location pos,Location length,SlowPathCode * slow_path,Register temp,bool length_is_array_length,bool position_sign_checked)819 static void CheckSystemArrayCopyPosition(X86Assembler* assembler,
820 Register array,
821 Location pos,
822 Location length,
823 SlowPathCode* slow_path,
824 Register temp,
825 bool length_is_array_length,
826 bool position_sign_checked) {
827 // Where is the length in the Array?
828 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
829
830 if (pos.IsConstant()) {
831 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
832 if (pos_const == 0) {
833 if (!length_is_array_length) {
834 // Check that length(array) >= length.
835 EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
836 }
837 } else {
838 // Calculate length(array) - pos.
839 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
840 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
841 __ movl(temp, Address(array, length_offset));
842 __ subl(temp, Immediate(pos_const));
843
844 // Check that (length(array) - pos) >= length.
845 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
846 }
847 } else if (length_is_array_length) {
848 // The only way the copy can succeed is if pos is zero.
849 Register pos_reg = pos.AsRegister<Register>();
850 __ testl(pos_reg, pos_reg);
851 __ j(kNotEqual, slow_path->GetEntryLabel());
852 } else {
853 // Check that pos >= 0.
854 Register pos_reg = pos.AsRegister<Register>();
855 if (!position_sign_checked) {
856 __ testl(pos_reg, pos_reg);
857 __ j(kLess, slow_path->GetEntryLabel());
858 }
859
860 // Calculate length(array) - pos.
861 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
862 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
863 __ movl(temp, Address(array, length_offset));
864 __ subl(temp, pos_reg);
865
866 // Check that (length(array) - pos) >= length.
867 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
868 }
869 }
870
SystemArrayCopyPrimitive(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,DataType::Type type)871 static void SystemArrayCopyPrimitive(HInvoke* invoke,
872 X86Assembler* assembler,
873 CodeGeneratorX86* codegen,
874 DataType::Type type) {
875 LocationSummary* locations = invoke->GetLocations();
876 Register src = locations->InAt(0).AsRegister<Register>();
877 Location src_pos = locations->InAt(1);
878 Register dest = locations->InAt(2).AsRegister<Register>();
879 Location dest_pos = locations->InAt(3);
880 Location length = locations->InAt(4);
881
882 // Temporaries that we need for MOVSB/W/L.
883 Register src_base = locations->GetTemp(0).AsRegister<Register>();
884 DCHECK_EQ(src_base, ESI);
885 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
886 DCHECK_EQ(dest_base, EDI);
887 Register count = locations->GetTemp(2).AsRegister<Register>();
888 DCHECK_EQ(count, ECX);
889
890 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
891 codegen->AddSlowPath(slow_path);
892
893 // Bail out if the source and destination are the same (to handle overlap).
894 __ cmpl(src, dest);
895 __ j(kEqual, slow_path->GetEntryLabel());
896
897 // Bail out if the source is null.
898 __ testl(src, src);
899 __ j(kEqual, slow_path->GetEntryLabel());
900
901 // Bail out if the destination is null.
902 __ testl(dest, dest);
903 __ j(kEqual, slow_path->GetEntryLabel());
904
905 // If the length is negative, bail out.
906 // We have already checked in the LocationsBuilder for the constant case.
907 if (!length.IsConstant()) {
908 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
909 __ j(kLess, slow_path->GetEntryLabel());
910 }
911
912 // We need the count in ECX.
913 if (length.IsConstant()) {
914 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
915 } else {
916 __ movl(count, length.AsRegister<Register>());
917 }
918
919 // Validity checks: source. Use src_base as a temporary register.
920 CheckSystemArrayCopyPosition(assembler,
921 src,
922 src_pos,
923 Location::RegisterLocation(count),
924 slow_path,
925 src_base,
926 /*length_is_array_length=*/ false,
927 /*position_sign_checked=*/ false);
928
929 // Validity checks: dest. Use src_base as a temporary register.
930 CheckSystemArrayCopyPosition(assembler,
931 dest,
932 dest_pos,
933 Location::RegisterLocation(count),
934 slow_path,
935 src_base,
936 /*length_is_array_length=*/ false,
937 /*position_sign_checked=*/ false);
938
939 // Okay, everything checks out. Finally time to do the copy.
940 // Check assumption that sizeof(Char) is 2 (used in scaling below).
941 const size_t data_size = DataType::Size(type);
942 const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
943
944 GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
945 GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
946
947 // Do the move.
948 switch (type) {
949 case DataType::Type::kInt8:
950 __ rep_movsb();
951 break;
952 case DataType::Type::kUint16:
953 __ rep_movsw();
954 break;
955 case DataType::Type::kInt32:
956 __ rep_movsl();
957 break;
958 default:
959 LOG(FATAL) << "Unexpected data type for intrinsic";
960 }
961 __ Bind(slow_path->GetExitLabel());
962 }
963
VisitSystemArrayCopyChar(HInvoke * invoke)964 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
965 CreateSystemArrayCopyLocations(invoke);
966 }
967
VisitSystemArrayCopyChar(HInvoke * invoke)968 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
969 X86Assembler* assembler = GetAssembler();
970 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
971 }
972
VisitSystemArrayCopyByte(HInvoke * invoke)973 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
974 X86Assembler* assembler = GetAssembler();
975 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
976 }
977
VisitSystemArrayCopyByte(HInvoke * invoke)978 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
979 CreateSystemArrayCopyLocations(invoke);
980 }
981
VisitSystemArrayCopyInt(HInvoke * invoke)982 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
983 X86Assembler* assembler = GetAssembler();
984 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
985 }
986
VisitSystemArrayCopyInt(HInvoke * invoke)987 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
988 CreateSystemArrayCopyLocations(invoke);
989 }
990
VisitStringCompareTo(HInvoke * invoke)991 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
992 // The inputs plus one temp.
993 LocationSummary* locations = new (allocator_) LocationSummary(
994 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
995 InvokeRuntimeCallingConvention calling_convention;
996 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
997 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
998 locations->SetOut(Location::RegisterLocation(EAX));
999 }
1000
VisitStringCompareTo(HInvoke * invoke)1001 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1002 X86Assembler* assembler = GetAssembler();
1003 LocationSummary* locations = invoke->GetLocations();
1004
1005 // Note that the null check must have been done earlier.
1006 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1007
1008 Register argument = locations->InAt(1).AsRegister<Register>();
1009 __ testl(argument, argument);
1010 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1011 codegen_->AddSlowPath(slow_path);
1012 __ j(kEqual, slow_path->GetEntryLabel());
1013
1014 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1015 __ Bind(slow_path->GetExitLabel());
1016 }
1017
VisitStringEquals(HInvoke * invoke)1018 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1019 LocationSummary* locations =
1020 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1021 locations->SetInAt(0, Location::RequiresRegister());
1022 locations->SetInAt(1, Location::RequiresRegister());
1023
1024 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1025 locations->AddTemp(Location::RegisterLocation(ECX));
1026 locations->AddTemp(Location::RegisterLocation(EDI));
1027
1028 // Set output, ESI needed for repe_cmpsl instruction anyways.
1029 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1030 }
1031
VisitStringEquals(HInvoke * invoke)1032 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1033 X86Assembler* assembler = GetAssembler();
1034 LocationSummary* locations = invoke->GetLocations();
1035
1036 Register str = locations->InAt(0).AsRegister<Register>();
1037 Register arg = locations->InAt(1).AsRegister<Register>();
1038 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1039 Register edi = locations->GetTemp(1).AsRegister<Register>();
1040 Register esi = locations->Out().AsRegister<Register>();
1041
1042 NearLabel end, return_true, return_false;
1043
1044 // Get offsets of count, value, and class fields within a string object.
1045 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1046 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1047 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1048
1049 // Note that the null check must have been done earlier.
1050 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1051
1052 StringEqualsOptimizations optimizations(invoke);
1053 if (!optimizations.GetArgumentNotNull()) {
1054 // Check if input is null, return false if it is.
1055 __ testl(arg, arg);
1056 __ j(kEqual, &return_false);
1057 }
1058
1059 if (!optimizations.GetArgumentIsString()) {
1060 // Instanceof check for the argument by comparing class fields.
1061 // All string objects must have the same type since String cannot be subclassed.
1062 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1063 // If the argument is a string object, its class field must be equal to receiver's class field.
1064 //
1065 // As the String class is expected to be non-movable, we can read the class
1066 // field from String.equals' arguments without read barriers.
1067 AssertNonMovableStringClass();
1068 // Also, because we use the loaded class references only to compare them, we
1069 // don't need to unpoison them.
1070 // /* HeapReference<Class> */ ecx = str->klass_
1071 __ movl(ecx, Address(str, class_offset));
1072 // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1073 __ cmpl(ecx, Address(arg, class_offset));
1074 __ j(kNotEqual, &return_false);
1075 }
1076
1077 // Reference equality check, return true if same reference.
1078 __ cmpl(str, arg);
1079 __ j(kEqual, &return_true);
1080
1081 // Load length and compression flag of receiver string.
1082 __ movl(ecx, Address(str, count_offset));
1083 // Check if lengths and compression flags are equal, return false if they're not.
1084 // Two identical strings will always have same compression style since
1085 // compression style is decided on alloc.
1086 __ cmpl(ecx, Address(arg, count_offset));
1087 __ j(kNotEqual, &return_false);
1088 // Return true if strings are empty. Even with string compression `count == 0` means empty.
1089 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1090 "Expecting 0=compressed, 1=uncompressed");
1091 __ jecxz(&return_true);
1092
1093 if (mirror::kUseStringCompression) {
1094 NearLabel string_uncompressed;
1095 // Extract length and differentiate between both compressed or both uncompressed.
1096 // Different compression style is cut above.
1097 __ shrl(ecx, Immediate(1));
1098 __ j(kCarrySet, &string_uncompressed);
1099 // Divide string length by 2, rounding up, and continue as if uncompressed.
1100 __ addl(ecx, Immediate(1));
1101 __ shrl(ecx, Immediate(1));
1102 __ Bind(&string_uncompressed);
1103 }
1104 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1105 __ leal(esi, Address(str, value_offset));
1106 __ leal(edi, Address(arg, value_offset));
1107
1108 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1109 // divisible by 2.
1110 __ addl(ecx, Immediate(1));
1111 __ shrl(ecx, Immediate(1));
1112
1113 // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1114 // or 4 characters (compressed) at a time.
1115 DCHECK_ALIGNED(value_offset, 4);
1116 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1117
1118 // Loop to compare strings two characters at a time starting at the beginning of the string.
1119 __ repe_cmpsl();
1120 // If strings are not equal, zero flag will be cleared.
1121 __ j(kNotEqual, &return_false);
1122
1123 // Return true and exit the function.
1124 // If loop does not result in returning false, we return true.
1125 __ Bind(&return_true);
1126 __ movl(esi, Immediate(1));
1127 __ jmp(&end);
1128
1129 // Return false and exit the function.
1130 __ Bind(&return_false);
1131 __ xorl(esi, esi);
1132 __ Bind(&end);
1133 }
1134
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1135 static void CreateStringIndexOfLocations(HInvoke* invoke,
1136 ArenaAllocator* allocator,
1137 bool start_at_zero) {
1138 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1139 LocationSummary::kCallOnSlowPath,
1140 kIntrinsified);
1141 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1142 locations->SetInAt(0, Location::RegisterLocation(EDI));
1143 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1144 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1145 // of the instruction explicitly.
1146 // Note: This works as we don't clobber EAX anywhere.
1147 locations->SetInAt(1, Location::RegisterLocation(EAX));
1148 if (!start_at_zero) {
1149 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1150 }
1151 // As we clobber EDI during execution anyways, also use it as the output.
1152 locations->SetOut(Location::SameAsFirstInput());
1153
1154 // repne scasw uses ECX as the counter.
1155 locations->AddTemp(Location::RegisterLocation(ECX));
1156 // Need another temporary to be able to compute the result.
1157 locations->AddTemp(Location::RequiresRegister());
1158 if (mirror::kUseStringCompression) {
1159 // Need another temporary to be able to save unflagged string length.
1160 locations->AddTemp(Location::RequiresRegister());
1161 }
1162 }
1163
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1164 static void GenerateStringIndexOf(HInvoke* invoke,
1165 X86Assembler* assembler,
1166 CodeGeneratorX86* codegen,
1167 bool start_at_zero) {
1168 LocationSummary* locations = invoke->GetLocations();
1169
1170 // Note that the null check must have been done earlier.
1171 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1172
1173 Register string_obj = locations->InAt(0).AsRegister<Register>();
1174 Register search_value = locations->InAt(1).AsRegister<Register>();
1175 Register counter = locations->GetTemp(0).AsRegister<Register>();
1176 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1177 Register out = locations->Out().AsRegister<Register>();
1178 // Only used when string compression feature is on.
1179 Register string_length_flagged;
1180
1181 // Check our assumptions for registers.
1182 DCHECK_EQ(string_obj, EDI);
1183 DCHECK_EQ(search_value, EAX);
1184 DCHECK_EQ(counter, ECX);
1185 DCHECK_EQ(out, EDI);
1186
1187 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1188 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1189 SlowPathCode* slow_path = nullptr;
1190 HInstruction* code_point = invoke->InputAt(1);
1191 if (code_point->IsIntConstant()) {
1192 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1193 std::numeric_limits<uint16_t>::max()) {
1194 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1195 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1196 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1197 codegen->AddSlowPath(slow_path);
1198 __ jmp(slow_path->GetEntryLabel());
1199 __ Bind(slow_path->GetExitLabel());
1200 return;
1201 }
1202 } else if (code_point->GetType() != DataType::Type::kUint16) {
1203 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1204 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1205 codegen->AddSlowPath(slow_path);
1206 __ j(kAbove, slow_path->GetEntryLabel());
1207 }
1208
1209 // From here down, we know that we are looking for a char that fits in 16 bits.
1210 // Location of reference to data array within the String object.
1211 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1212 // Location of count within the String object.
1213 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1214
1215 // Load the count field of the string containing the length and compression flag.
1216 __ movl(string_length, Address(string_obj, count_offset));
1217
1218 // Do a zero-length check. Even with string compression `count == 0` means empty.
1219 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1220 "Expecting 0=compressed, 1=uncompressed");
1221 // TODO: Support jecxz.
1222 NearLabel not_found_label;
1223 __ testl(string_length, string_length);
1224 __ j(kEqual, ¬_found_label);
1225
1226 if (mirror::kUseStringCompression) {
1227 string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1228 __ movl(string_length_flagged, string_length);
1229 // Extract the length and shift out the least significant bit used as compression flag.
1230 __ shrl(string_length, Immediate(1));
1231 }
1232
1233 if (start_at_zero) {
1234 // Number of chars to scan is the same as the string length.
1235 __ movl(counter, string_length);
1236
1237 // Move to the start of the string.
1238 __ addl(string_obj, Immediate(value_offset));
1239 } else {
1240 Register start_index = locations->InAt(2).AsRegister<Register>();
1241
1242 // Do a start_index check.
1243 __ cmpl(start_index, string_length);
1244 __ j(kGreaterEqual, ¬_found_label);
1245
1246 // Ensure we have a start index >= 0;
1247 __ xorl(counter, counter);
1248 __ cmpl(start_index, Immediate(0));
1249 __ cmovl(kGreater, counter, start_index);
1250
1251 if (mirror::kUseStringCompression) {
1252 NearLabel modify_counter, offset_uncompressed_label;
1253 __ testl(string_length_flagged, Immediate(1));
1254 __ j(kNotZero, &offset_uncompressed_label);
1255 // Move to the start of the string: string_obj + value_offset + start_index.
1256 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1257 __ jmp(&modify_counter);
1258
1259 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1260 __ Bind(&offset_uncompressed_label);
1261 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1262
1263 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1264 // compare.
1265 __ Bind(&modify_counter);
1266 } else {
1267 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1268 }
1269 __ negl(counter);
1270 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1271 }
1272
1273 if (mirror::kUseStringCompression) {
1274 NearLabel uncompressed_string_comparison;
1275 NearLabel comparison_done;
1276 __ testl(string_length_flagged, Immediate(1));
1277 __ j(kNotZero, &uncompressed_string_comparison);
1278
1279 // Check if EAX (search_value) is ASCII.
1280 __ cmpl(search_value, Immediate(127));
1281 __ j(kGreater, ¬_found_label);
1282 // Comparing byte-per-byte.
1283 __ repne_scasb();
1284 __ jmp(&comparison_done);
1285
1286 // Everything is set up for repne scasw:
1287 // * Comparison address in EDI.
1288 // * Counter in ECX.
1289 __ Bind(&uncompressed_string_comparison);
1290 __ repne_scasw();
1291 __ Bind(&comparison_done);
1292 } else {
1293 __ repne_scasw();
1294 }
1295 // Did we find a match?
1296 __ j(kNotEqual, ¬_found_label);
1297
1298 // Yes, we matched. Compute the index of the result.
1299 __ subl(string_length, counter);
1300 __ leal(out, Address(string_length, -1));
1301
1302 NearLabel done;
1303 __ jmp(&done);
1304
1305 // Failed to match; return -1.
1306 __ Bind(¬_found_label);
1307 __ movl(out, Immediate(-1));
1308
1309 // And join up at the end.
1310 __ Bind(&done);
1311 if (slow_path != nullptr) {
1312 __ Bind(slow_path->GetExitLabel());
1313 }
1314 }
1315
VisitStringIndexOf(HInvoke * invoke)1316 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1317 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1318 }
1319
VisitStringIndexOf(HInvoke * invoke)1320 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1321 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1322 }
1323
VisitStringIndexOfAfter(HInvoke * invoke)1324 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1325 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1326 }
1327
VisitStringIndexOfAfter(HInvoke * invoke)1328 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1329 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1330 }
1331
VisitStringNewStringFromBytes(HInvoke * invoke)1332 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1333 LocationSummary* locations = new (allocator_) LocationSummary(
1334 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1335 InvokeRuntimeCallingConvention calling_convention;
1336 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1337 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1338 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1339 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1340 locations->SetOut(Location::RegisterLocation(EAX));
1341 }
1342
VisitStringNewStringFromBytes(HInvoke * invoke)1343 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1344 X86Assembler* assembler = GetAssembler();
1345 LocationSummary* locations = invoke->GetLocations();
1346
1347 Register byte_array = locations->InAt(0).AsRegister<Register>();
1348 __ testl(byte_array, byte_array);
1349 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1350 codegen_->AddSlowPath(slow_path);
1351 __ j(kEqual, slow_path->GetEntryLabel());
1352
1353 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1354 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1355 __ Bind(slow_path->GetExitLabel());
1356 }
1357
VisitStringNewStringFromChars(HInvoke * invoke)1358 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1359 LocationSummary* locations =
1360 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1361 InvokeRuntimeCallingConvention calling_convention;
1362 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1363 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1364 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1365 locations->SetOut(Location::RegisterLocation(EAX));
1366 }
1367
VisitStringNewStringFromChars(HInvoke * invoke)1368 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1369 // No need to emit code checking whether `locations->InAt(2)` is a null
1370 // pointer, as callers of the native method
1371 //
1372 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1373 //
1374 // all include a null check on `data` before calling that method.
1375 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1376 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1377 }
1378
VisitStringNewStringFromString(HInvoke * invoke)1379 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1380 LocationSummary* locations = new (allocator_) LocationSummary(
1381 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1382 InvokeRuntimeCallingConvention calling_convention;
1383 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1384 locations->SetOut(Location::RegisterLocation(EAX));
1385 }
1386
VisitStringNewStringFromString(HInvoke * invoke)1387 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1388 X86Assembler* assembler = GetAssembler();
1389 LocationSummary* locations = invoke->GetLocations();
1390
1391 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1392 __ testl(string_to_copy, string_to_copy);
1393 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1394 codegen_->AddSlowPath(slow_path);
1395 __ j(kEqual, slow_path->GetEntryLabel());
1396
1397 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1398 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1399 __ Bind(slow_path->GetExitLabel());
1400 }
1401
VisitStringGetCharsNoCheck(HInvoke * invoke)1402 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1403 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1404 LocationSummary* locations =
1405 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1406 locations->SetInAt(0, Location::RequiresRegister());
1407 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1408 // Place srcEnd in ECX to save a move below.
1409 locations->SetInAt(2, Location::RegisterLocation(ECX));
1410 locations->SetInAt(3, Location::RequiresRegister());
1411 locations->SetInAt(4, Location::RequiresRegister());
1412
1413 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1414 // We don't have enough registers to also grab ECX, so handle below.
1415 locations->AddTemp(Location::RegisterLocation(ESI));
1416 locations->AddTemp(Location::RegisterLocation(EDI));
1417 }
1418
VisitStringGetCharsNoCheck(HInvoke * invoke)1419 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1420 X86Assembler* assembler = GetAssembler();
1421 LocationSummary* locations = invoke->GetLocations();
1422
1423 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1424 // Location of data in char array buffer.
1425 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1426 // Location of char array data in string.
1427 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1428
1429 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1430 Register obj = locations->InAt(0).AsRegister<Register>();
1431 Location srcBegin = locations->InAt(1);
1432 int srcBegin_value =
1433 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1434 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1435 Register dst = locations->InAt(3).AsRegister<Register>();
1436 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1437
1438 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1439 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1440 DCHECK_EQ(char_size, 2u);
1441
1442 // Compute the number of chars (words) to move.
1443 // Save ECX, since we don't know if it will be used later.
1444 __ pushl(ECX);
1445 int stack_adjust = kX86WordSize;
1446 __ cfi().AdjustCFAOffset(stack_adjust);
1447 DCHECK_EQ(srcEnd, ECX);
1448 if (srcBegin.IsConstant()) {
1449 __ subl(ECX, Immediate(srcBegin_value));
1450 } else {
1451 DCHECK(srcBegin.IsRegister());
1452 __ subl(ECX, srcBegin.AsRegister<Register>());
1453 }
1454
1455 NearLabel done;
1456 if (mirror::kUseStringCompression) {
1457 // Location of count in string
1458 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1459 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1460 DCHECK_EQ(c_char_size, 1u);
1461 __ pushl(EAX);
1462 __ cfi().AdjustCFAOffset(stack_adjust);
1463
1464 NearLabel copy_loop, copy_uncompressed;
1465 __ testl(Address(obj, count_offset), Immediate(1));
1466 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1467 "Expecting 0=compressed, 1=uncompressed");
1468 __ j(kNotZero, ©_uncompressed);
1469 // Compute the address of the source string by adding the number of chars from
1470 // the source beginning to the value offset of a string.
1471 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1472
1473 // Start the loop to copy String's value to Array of Char.
1474 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1475 __ Bind(©_loop);
1476 __ jecxz(&done);
1477 // Use EAX temporary (convert byte from ESI to word).
1478 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1479 __ movzxb(EAX, Address(ESI, 0));
1480 __ movw(Address(EDI, 0), EAX);
1481 __ leal(EDI, Address(EDI, char_size));
1482 __ leal(ESI, Address(ESI, c_char_size));
1483 // TODO: Add support for LOOP to X86Assembler.
1484 __ subl(ECX, Immediate(1));
1485 __ jmp(©_loop);
1486 __ Bind(©_uncompressed);
1487 }
1488
1489 // Do the copy for uncompressed string.
1490 // Compute the address of the destination buffer.
1491 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1492 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1493 __ rep_movsw();
1494
1495 __ Bind(&done);
1496 if (mirror::kUseStringCompression) {
1497 // Restore EAX.
1498 __ popl(EAX);
1499 __ cfi().AdjustCFAOffset(-stack_adjust);
1500 }
1501 // Restore ECX.
1502 __ popl(ECX);
1503 __ cfi().AdjustCFAOffset(-stack_adjust);
1504 }
1505
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1506 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1507 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1508 Location out_loc = locations->Out();
1509 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1510 // to avoid a SIGBUS.
1511 switch (size) {
1512 case DataType::Type::kInt8:
1513 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1514 break;
1515 case DataType::Type::kInt16:
1516 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1517 break;
1518 case DataType::Type::kInt32:
1519 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1520 break;
1521 case DataType::Type::kInt64:
1522 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1523 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1524 break;
1525 default:
1526 LOG(FATAL) << "Type not recognized for peek: " << size;
1527 UNREACHABLE();
1528 }
1529 }
1530
VisitMemoryPeekByte(HInvoke * invoke)1531 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1532 CreateLongToIntLocations(allocator_, invoke);
1533 }
1534
VisitMemoryPeekByte(HInvoke * invoke)1535 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1536 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1537 }
1538
VisitMemoryPeekIntNative(HInvoke * invoke)1539 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1540 CreateLongToIntLocations(allocator_, invoke);
1541 }
1542
VisitMemoryPeekIntNative(HInvoke * invoke)1543 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1544 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1545 }
1546
VisitMemoryPeekLongNative(HInvoke * invoke)1547 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1548 CreateLongToLongLocations(allocator_, invoke);
1549 }
1550
VisitMemoryPeekLongNative(HInvoke * invoke)1551 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1552 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1553 }
1554
VisitMemoryPeekShortNative(HInvoke * invoke)1555 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1556 CreateLongToIntLocations(allocator_, invoke);
1557 }
1558
VisitMemoryPeekShortNative(HInvoke * invoke)1559 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1560 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1561 }
1562
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1563 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1564 DataType::Type size,
1565 HInvoke* invoke) {
1566 LocationSummary* locations =
1567 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1568 locations->SetInAt(0, Location::RequiresRegister());
1569 HInstruction* value = invoke->InputAt(1);
1570 if (size == DataType::Type::kInt8) {
1571 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1572 } else {
1573 locations->SetInAt(1, Location::RegisterOrConstant(value));
1574 }
1575 }
1576
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1577 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1578 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1579 Location value_loc = locations->InAt(1);
1580 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1581 // to avoid a SIGBUS.
1582 switch (size) {
1583 case DataType::Type::kInt8:
1584 if (value_loc.IsConstant()) {
1585 __ movb(Address(address, 0),
1586 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1587 } else {
1588 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1589 }
1590 break;
1591 case DataType::Type::kInt16:
1592 if (value_loc.IsConstant()) {
1593 __ movw(Address(address, 0),
1594 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1595 } else {
1596 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1597 }
1598 break;
1599 case DataType::Type::kInt32:
1600 if (value_loc.IsConstant()) {
1601 __ movl(Address(address, 0),
1602 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1603 } else {
1604 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1605 }
1606 break;
1607 case DataType::Type::kInt64:
1608 if (value_loc.IsConstant()) {
1609 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1610 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1611 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1612 } else {
1613 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1614 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1615 }
1616 break;
1617 default:
1618 LOG(FATAL) << "Type not recognized for poke: " << size;
1619 UNREACHABLE();
1620 }
1621 }
1622
VisitMemoryPokeByte(HInvoke * invoke)1623 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1624 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1625 }
1626
VisitMemoryPokeByte(HInvoke * invoke)1627 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1628 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1629 }
1630
VisitMemoryPokeIntNative(HInvoke * invoke)1631 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1632 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1633 }
1634
VisitMemoryPokeIntNative(HInvoke * invoke)1635 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1636 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1637 }
1638
VisitMemoryPokeLongNative(HInvoke * invoke)1639 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1640 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1641 }
1642
VisitMemoryPokeLongNative(HInvoke * invoke)1643 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1644 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1645 }
1646
VisitMemoryPokeShortNative(HInvoke * invoke)1647 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1648 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1649 }
1650
VisitMemoryPokeShortNative(HInvoke * invoke)1651 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1652 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1653 }
1654
VisitThreadCurrentThread(HInvoke * invoke)1655 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1656 LocationSummary* locations =
1657 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1658 locations->SetOut(Location::RequiresRegister());
1659 }
1660
VisitThreadCurrentThread(HInvoke * invoke)1661 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1662 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1663 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1664 }
1665
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1666 static void GenUnsafeGet(HInvoke* invoke,
1667 DataType::Type type,
1668 bool is_volatile,
1669 CodeGeneratorX86* codegen) {
1670 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1671 LocationSummary* locations = invoke->GetLocations();
1672 Location base_loc = locations->InAt(1);
1673 Register base = base_loc.AsRegister<Register>();
1674 Location offset_loc = locations->InAt(2);
1675 Register offset = offset_loc.AsRegisterPairLow<Register>();
1676 Location output_loc = locations->Out();
1677
1678 switch (type) {
1679 case DataType::Type::kInt8: {
1680 Register output = output_loc.AsRegister<Register>();
1681 __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1682 break;
1683 }
1684
1685 case DataType::Type::kInt32: {
1686 Register output = output_loc.AsRegister<Register>();
1687 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1688 break;
1689 }
1690
1691 case DataType::Type::kReference: {
1692 Register output = output_loc.AsRegister<Register>();
1693 if (codegen->EmitReadBarrier()) {
1694 if (kUseBakerReadBarrier) {
1695 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1696 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1697 invoke, output_loc, base, src, /* needs_null_check= */ false);
1698 } else {
1699 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1700 codegen->GenerateReadBarrierSlow(
1701 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1702 }
1703 } else {
1704 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1705 __ MaybeUnpoisonHeapReference(output);
1706 }
1707 break;
1708 }
1709
1710 case DataType::Type::kInt64: {
1711 Register output_lo = output_loc.AsRegisterPairLow<Register>();
1712 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1713 if (is_volatile) {
1714 // Need to use a XMM to read atomically.
1715 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1716 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1717 __ movd(output_lo, temp);
1718 __ psrlq(temp, Immediate(32));
1719 __ movd(output_hi, temp);
1720 } else {
1721 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1722 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1723 }
1724 }
1725 break;
1726
1727 default:
1728 LOG(FATAL) << "Unsupported op size " << type;
1729 UNREACHABLE();
1730 }
1731 }
1732
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen,DataType::Type type,bool is_volatile)1733 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1734 HInvoke* invoke,
1735 CodeGeneratorX86* codegen,
1736 DataType::Type type,
1737 bool is_volatile) {
1738 bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1739 LocationSummary* locations =
1740 new (allocator) LocationSummary(invoke,
1741 can_call
1742 ? LocationSummary::kCallOnSlowPath
1743 : LocationSummary::kNoCall,
1744 kIntrinsified);
1745 if (can_call && kUseBakerReadBarrier) {
1746 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1747 }
1748 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1749 locations->SetInAt(1, Location::RequiresRegister());
1750 locations->SetInAt(2, Location::RequiresRegister());
1751 if (type == DataType::Type::kInt64) {
1752 if (is_volatile) {
1753 // Need to use XMM to read volatile.
1754 locations->AddTemp(Location::RequiresFpuRegister());
1755 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1756 } else {
1757 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1758 }
1759 } else {
1760 locations->SetOut(Location::RequiresRegister(),
1761 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1762 }
1763 }
1764
VisitUnsafeGet(HInvoke * invoke)1765 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1766 VisitJdkUnsafeGet(invoke);
1767 }
VisitUnsafeGetVolatile(HInvoke * invoke)1768 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1769 VisitJdkUnsafeGetVolatile(invoke);
1770 }
VisitUnsafeGetLong(HInvoke * invoke)1771 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1772 VisitJdkUnsafeGetLong(invoke);
1773 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1774 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1775 VisitJdkUnsafeGetLongVolatile(invoke);
1776 }
VisitUnsafeGetObject(HInvoke * invoke)1777 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1778 VisitJdkUnsafeGetReference(invoke);
1779 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1780 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1781 VisitJdkUnsafeGetReferenceVolatile(invoke);
1782 }
VisitUnsafeGetByte(HInvoke * invoke)1783 void IntrinsicLocationsBuilderX86::VisitUnsafeGetByte(HInvoke* invoke) {
1784 VisitJdkUnsafeGetByte(invoke);
1785 }
1786
VisitUnsafeGet(HInvoke * invoke)1787 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1788 VisitJdkUnsafeGet(invoke);
1789 }
VisitUnsafeGetVolatile(HInvoke * invoke)1790 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1791 VisitJdkUnsafeGetVolatile(invoke);
1792 }
VisitUnsafeGetLong(HInvoke * invoke)1793 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1794 VisitJdkUnsafeGetLong(invoke);
1795 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1796 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1797 VisitJdkUnsafeGetLongVolatile(invoke);
1798 }
VisitUnsafeGetObject(HInvoke * invoke)1799 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1800 VisitJdkUnsafeGetReference(invoke);
1801 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1802 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1803 VisitJdkUnsafeGetReferenceVolatile(invoke);
1804 }
VisitUnsafeGetByte(HInvoke * invoke)1805 void IntrinsicCodeGeneratorX86::VisitUnsafeGetByte(HInvoke* invoke) {
1806 VisitJdkUnsafeGetByte(invoke);
1807 }
1808
VisitJdkUnsafeGet(HInvoke * invoke)1809 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1810 CreateIntIntIntToIntLocations(
1811 allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ false);
1812 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1813 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1814 CreateIntIntIntToIntLocations(
1815 allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
1816 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1817 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1818 CreateIntIntIntToIntLocations(
1819 allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
1820 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1821 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1822 CreateIntIntIntToIntLocations(
1823 allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ false);
1824 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1825 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1826 CreateIntIntIntToIntLocations(
1827 allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
1828 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1829 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1830 CreateIntIntIntToIntLocations(
1831 allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
1832 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1833 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1834 CreateIntIntIntToIntLocations(
1835 allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ false);
1836 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1837 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1838 CreateIntIntIntToIntLocations(
1839 allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
1840 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1841 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1842 CreateIntIntIntToIntLocations(
1843 allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
1844 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1845 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1846 CreateIntIntIntToIntLocations(
1847 allocator_, invoke, codegen_, DataType::Type::kInt8, /*is_volatile=*/ false);
1848 }
1849
VisitJdkUnsafeGet(HInvoke * invoke)1850 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1851 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1852 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1853 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1854 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1855 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1856 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1857 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1858 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1859 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1860 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
1861 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1862 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1863 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1864 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1865 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1866 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1867 }
VisitJdkUnsafeGetReference(HInvoke * invoke)1868 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
1869 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
1870 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)1871 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
1872 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1873 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)1874 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
1875 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1876 }
VisitJdkUnsafeGetByte(HInvoke * invoke)1877 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
1878 GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
1879 }
1880
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1881 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1882 DataType::Type type,
1883 HInvoke* invoke,
1884 bool is_volatile) {
1885 LocationSummary* locations =
1886 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1887 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1888 locations->SetInAt(1, Location::RequiresRegister());
1889 locations->SetInAt(2, Location::RequiresRegister());
1890 locations->SetInAt(3, Location::RequiresRegister());
1891 if (type == DataType::Type::kReference) {
1892 // Need temp registers for card-marking.
1893 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1894 // Ensure the value is in a byte register.
1895 locations->AddTemp(Location::RegisterLocation(ECX));
1896 } else if (type == DataType::Type::kInt64 && is_volatile) {
1897 locations->AddTemp(Location::RequiresFpuRegister());
1898 locations->AddTemp(Location::RequiresFpuRegister());
1899 }
1900 }
1901
VisitUnsafePut(HInvoke * invoke)1902 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1903 VisitJdkUnsafePut(invoke);
1904 }
VisitUnsafePutOrdered(HInvoke * invoke)1905 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1906 VisitJdkUnsafePutOrdered(invoke);
1907 }
VisitUnsafePutVolatile(HInvoke * invoke)1908 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1909 VisitJdkUnsafePutVolatile(invoke);
1910 }
VisitUnsafePutObject(HInvoke * invoke)1911 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1912 VisitJdkUnsafePutReference(invoke);
1913 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1914 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1915 VisitJdkUnsafePutObjectOrdered(invoke);
1916 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1917 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1918 VisitJdkUnsafePutReferenceVolatile(invoke);
1919 }
VisitUnsafePutLong(HInvoke * invoke)1920 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1921 VisitJdkUnsafePutLong(invoke);
1922 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1923 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1924 VisitJdkUnsafePutLongOrdered(invoke);
1925 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1926 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1927 VisitJdkUnsafePutLongVolatile(invoke);
1928 }
VisitUnsafePutByte(HInvoke * invoke)1929 void IntrinsicLocationsBuilderX86::VisitUnsafePutByte(HInvoke* invoke) {
1930 VisitJdkUnsafePutByte(invoke);
1931 }
1932
VisitJdkUnsafePut(HInvoke * invoke)1933 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePut(HInvoke* invoke) {
1934 CreateIntIntIntIntToVoidPlusTempsLocations(
1935 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
1936 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)1937 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
1938 CreateIntIntIntIntToVoidPlusTempsLocations(
1939 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
1940 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)1941 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
1942 CreateIntIntIntIntToVoidPlusTempsLocations(
1943 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
1944 }
VisitJdkUnsafePutRelease(HInvoke * invoke)1945 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
1946 CreateIntIntIntIntToVoidPlusTempsLocations(
1947 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
1948 }
VisitJdkUnsafePutReference(HInvoke * invoke)1949 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
1950 CreateIntIntIntIntToVoidPlusTempsLocations(
1951 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
1952 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
1954 CreateIntIntIntIntToVoidPlusTempsLocations(
1955 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
1956 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)1957 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
1958 CreateIntIntIntIntToVoidPlusTempsLocations(
1959 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
1960 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)1961 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
1962 CreateIntIntIntIntToVoidPlusTempsLocations(
1963 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
1964 }
VisitJdkUnsafePutLong(HInvoke * invoke)1965 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
1966 CreateIntIntIntIntToVoidPlusTempsLocations(
1967 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
1968 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)1969 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
1970 CreateIntIntIntIntToVoidPlusTempsLocations(
1971 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
1972 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)1973 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
1974 CreateIntIntIntIntToVoidPlusTempsLocations(
1975 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
1976 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)1977 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
1978 CreateIntIntIntIntToVoidPlusTempsLocations(
1979 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
1980 }
VisitJdkUnsafePutByte(HInvoke * invoke)1981 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
1982 CreateIntIntIntIntToVoidPlusTempsLocations(
1983 allocator_, DataType::Type::kInt8, invoke, /*is_volatile=*/ false);
1984 }
1985
1986 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1987 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1988 static void GenUnsafePut(LocationSummary* locations,
1989 DataType::Type type,
1990 bool is_volatile,
1991 CodeGeneratorX86* codegen) {
1992 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1993 Register base = locations->InAt(1).AsRegister<Register>();
1994 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1995 Location value_loc = locations->InAt(3);
1996
1997 if (type == DataType::Type::kInt64) {
1998 Register value_lo = value_loc.AsRegisterPairLow<Register>();
1999 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2000 if (is_volatile) {
2001 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2002 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2003 __ movd(temp1, value_lo);
2004 __ movd(temp2, value_hi);
2005 __ punpckldq(temp1, temp2);
2006 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2007 } else {
2008 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2009 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2010 }
2011 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2012 Register temp = locations->GetTemp(0).AsRegister<Register>();
2013 __ movl(temp, value_loc.AsRegister<Register>());
2014 __ PoisonHeapReference(temp);
2015 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2016 } else {
2017 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2018 }
2019
2020 if (is_volatile) {
2021 codegen->MemoryFence();
2022 }
2023
2024 if (type == DataType::Type::kReference) {
2025 bool value_can_be_null = true; // TODO: Worth finding out this information?
2026 codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2027 locations->GetTemp(1).AsRegister<Register>(),
2028 base,
2029 value_loc.AsRegister<Register>(),
2030 value_can_be_null);
2031 }
2032 }
2033
VisitUnsafePut(HInvoke * invoke)2034 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
2035 VisitJdkUnsafePut(invoke);
2036 }
VisitUnsafePutOrdered(HInvoke * invoke)2037 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2038 VisitJdkUnsafePutOrdered(invoke);
2039 }
VisitUnsafePutVolatile(HInvoke * invoke)2040 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2041 VisitJdkUnsafePutVolatile(invoke);
2042 }
VisitUnsafePutObject(HInvoke * invoke)2043 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
2044 VisitJdkUnsafePutReference(invoke);
2045 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2046 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2047 VisitJdkUnsafePutObjectOrdered(invoke);
2048 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2049 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2050 VisitJdkUnsafePutReferenceVolatile(invoke);
2051 }
VisitUnsafePutLong(HInvoke * invoke)2052 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
2053 VisitJdkUnsafePutLong(invoke);
2054 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2055 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2056 VisitJdkUnsafePutLongOrdered(invoke);
2057 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2058 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2059 VisitJdkUnsafePutLongVolatile(invoke);
2060 }
VisitUnsafePutByte(HInvoke * invoke)2061 void IntrinsicCodeGeneratorX86::VisitUnsafePutByte(HInvoke* invoke) {
2062 VisitJdkUnsafePutByte(invoke);
2063 }
2064
VisitJdkUnsafePut(HInvoke * invoke)2065 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePut(HInvoke* invoke) {
2066 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2067 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2068 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2069 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2070 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2071 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2072 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2073 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2074 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2075 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2076 }
VisitJdkUnsafePutReference(HInvoke * invoke)2077 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
2078 GenUnsafePut(
2079 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2080 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2081 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2082 GenUnsafePut(
2083 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2084 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2085 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2086 GenUnsafePut(
2087 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2088 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2089 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2090 GenUnsafePut(
2091 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2092 }
VisitJdkUnsafePutLong(HInvoke * invoke)2093 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
2094 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2095 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2096 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2097 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2098 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2099 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2100 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2101 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2102 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2103 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2104 }
VisitJdkUnsafePutByte(HInvoke * invoke)2105 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
2106 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
2107 }
2108
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,DataType::Type type,HInvoke * invoke)2109 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
2110 CodeGeneratorX86* codegen,
2111 DataType::Type type,
2112 HInvoke* invoke) {
2113 const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2114 LocationSummary* locations =
2115 new (allocator) LocationSummary(invoke,
2116 can_call
2117 ? LocationSummary::kCallOnSlowPath
2118 : LocationSummary::kNoCall,
2119 kIntrinsified);
2120 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2121 locations->SetInAt(1, Location::RequiresRegister());
2122 // Offset is a long, but in 32 bit mode, we only need the low word.
2123 // Can we update the invoke here to remove a TypeConvert to Long?
2124 locations->SetInAt(2, Location::RequiresRegister());
2125 // Expected value must be in EAX or EDX:EAX.
2126 // For long, new value must be in ECX:EBX.
2127 if (type == DataType::Type::kInt64) {
2128 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2129 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2130 } else {
2131 locations->SetInAt(3, Location::RegisterLocation(EAX));
2132 locations->SetInAt(4, Location::RequiresRegister());
2133 }
2134
2135 // Force a byte register for the output.
2136 locations->SetOut(Location::RegisterLocation(EAX));
2137 if (type == DataType::Type::kReference) {
2138 // Need temporary registers for card-marking, and possibly for
2139 // (Baker) read barrier.
2140 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2141 // Need a byte register for marking.
2142 locations->AddTemp(Location::RegisterLocation(ECX));
2143 }
2144 }
2145
VisitUnsafeCASInt(HInvoke * invoke)2146 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2147 VisitJdkUnsafeCASInt(invoke);
2148 }
2149
VisitUnsafeCASLong(HInvoke * invoke)2150 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2151 VisitJdkUnsafeCASLong(invoke);
2152 }
2153
VisitUnsafeCASObject(HInvoke * invoke)2154 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
2155 VisitJdkUnsafeCASObject(invoke);
2156 }
2157
VisitJdkUnsafeCASInt(HInvoke * invoke)2158 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2159 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2160 VisitJdkUnsafeCompareAndSetInt(invoke);
2161 }
2162
VisitJdkUnsafeCASLong(HInvoke * invoke)2163 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2164 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2165 VisitJdkUnsafeCompareAndSetLong(invoke);
2166 }
2167
VisitJdkUnsafeCASObject(HInvoke * invoke)2168 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2169 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2170 VisitJdkUnsafeCompareAndSetReference(invoke);
2171 }
2172
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2173 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2174 CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt32, invoke);
2175 }
2176
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2177 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2178 CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt64, invoke);
2179 }
2180
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2181 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2182 // The only supported read barrier implementation is the Baker-style read barriers.
2183 if (codegen_->EmitNonBakerReadBarrier()) {
2184 return;
2185 }
2186
2187 CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kReference, invoke);
2188 }
2189
GenPrimitiveLockedCmpxchg(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp=Register::kNoRegister)2190 static void GenPrimitiveLockedCmpxchg(DataType::Type type,
2191 CodeGeneratorX86* codegen,
2192 Location expected_value,
2193 Location new_value,
2194 Register base,
2195 Register offset,
2196 // Only necessary for floating point
2197 Register temp = Register::kNoRegister) {
2198 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2199
2200 if (DataType::Kind(type) == DataType::Type::kInt32) {
2201 DCHECK_EQ(expected_value.AsRegister<Register>(), EAX);
2202 }
2203
2204 // The address of the field within the holding object.
2205 Address field_addr(base, offset, TIMES_1, 0);
2206
2207 switch (type) {
2208 case DataType::Type::kBool:
2209 case DataType::Type::kInt8:
2210 __ LockCmpxchgb(field_addr, new_value.AsRegister<ByteRegister>());
2211 break;
2212 case DataType::Type::kInt16:
2213 case DataType::Type::kUint16:
2214 __ LockCmpxchgw(field_addr, new_value.AsRegister<Register>());
2215 break;
2216 case DataType::Type::kInt32:
2217 __ LockCmpxchgl(field_addr, new_value.AsRegister<Register>());
2218 break;
2219 case DataType::Type::kFloat32: {
2220 // cmpxchg requires the expected value to be in EAX so the new value must be elsewhere.
2221 DCHECK_NE(temp, EAX);
2222 // EAX is both an input and an output for cmpxchg
2223 codegen->Move32(Location::RegisterLocation(EAX), expected_value);
2224 codegen->Move32(Location::RegisterLocation(temp), new_value);
2225 __ LockCmpxchgl(field_addr, temp);
2226 break;
2227 }
2228 case DataType::Type::kInt64:
2229 // Ensure the expected value is in EAX:EDX and that the new
2230 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2231 DCHECK_EQ(expected_value.AsRegisterPairLow<Register>(), EAX);
2232 DCHECK_EQ(expected_value.AsRegisterPairHigh<Register>(), EDX);
2233 DCHECK_EQ(new_value.AsRegisterPairLow<Register>(), EBX);
2234 DCHECK_EQ(new_value.AsRegisterPairHigh<Register>(), ECX);
2235 __ LockCmpxchg8b(field_addr);
2236 break;
2237 default:
2238 LOG(FATAL) << "Unexpected CAS type " << type;
2239 }
2240 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2241 // don't need scheduling barriers at this time.
2242 }
2243
GenPrimitiveCAS(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Location out,Register temp=Register::kNoRegister,bool is_cmpxchg=false)2244 static void GenPrimitiveCAS(DataType::Type type,
2245 CodeGeneratorX86* codegen,
2246 Location expected_value,
2247 Location new_value,
2248 Register base,
2249 Register offset,
2250 Location out,
2251 // Only necessary for floating point
2252 Register temp = Register::kNoRegister,
2253 bool is_cmpxchg = false) {
2254 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2255
2256 if (!is_cmpxchg || DataType::Kind(type) == DataType::Type::kInt32) {
2257 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2258 }
2259
2260 GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp);
2261
2262 if (is_cmpxchg) {
2263 // Sign-extend, zero-extend or move the result if necessary
2264 switch (type) {
2265 case DataType::Type::kBool:
2266 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2267 break;
2268 case DataType::Type::kInt8:
2269 __ movsxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2270 break;
2271 case DataType::Type::kInt16:
2272 __ movsxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2273 break;
2274 case DataType::Type::kUint16:
2275 __ movzxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2276 break;
2277 case DataType::Type::kFloat32:
2278 __ movd(out.AsFpuRegister<XmmRegister>(), EAX);
2279 break;
2280 default:
2281 // Nothing to do
2282 break;
2283 }
2284 } else {
2285 // Convert ZF into the Boolean result.
2286 __ setb(kZero, out.AsRegister<Register>());
2287 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2288 }
2289 }
2290
GenReferenceCAS(HInvoke * invoke,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp,Register temp2,bool is_cmpxchg=false)2291 static void GenReferenceCAS(HInvoke* invoke,
2292 CodeGeneratorX86* codegen,
2293 Location expected_value,
2294 Location new_value,
2295 Register base,
2296 Register offset,
2297 Register temp,
2298 Register temp2,
2299 bool is_cmpxchg = false) {
2300 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2301 LocationSummary* locations = invoke->GetLocations();
2302 Location out = locations->Out();
2303
2304 // The address of the field within the holding object.
2305 Address field_addr(base, offset, TIMES_1, 0);
2306
2307 Register value = new_value.AsRegister<Register>();
2308 Register expected = expected_value.AsRegister<Register>();
2309 DCHECK_EQ(expected, EAX);
2310 DCHECK_NE(temp, temp2);
2311
2312 if (codegen->EmitBakerReadBarrier()) {
2313 // Need to make sure the reference stored in the field is a to-space
2314 // one before attempting the CAS or the CAS could fail incorrectly.
2315 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2316 invoke,
2317 // Unused, used only as a "temporary" within the read barrier.
2318 Location::RegisterLocation(temp),
2319 base,
2320 field_addr,
2321 /* needs_null_check= */ false,
2322 /* always_update_field= */ true,
2323 &temp2);
2324 }
2325 bool base_equals_value = (base == value);
2326 if (kPoisonHeapReferences) {
2327 if (base_equals_value) {
2328 // If `base` and `value` are the same register location, move
2329 // `value` to a temporary register. This way, poisoning
2330 // `value` won't invalidate `base`.
2331 value = temp;
2332 __ movl(value, base);
2333 }
2334
2335 // Check that the register allocator did not assign the location
2336 // of `expected` (EAX) to `value` nor to `base`, so that heap
2337 // poisoning (when enabled) works as intended below.
2338 // - If `value` were equal to `expected`, both references would
2339 // be poisoned twice, meaning they would not be poisoned at
2340 // all, as heap poisoning uses address negation.
2341 // - If `base` were equal to `expected`, poisoning `expected`
2342 // would invalidate `base`.
2343 DCHECK_NE(value, expected);
2344 DCHECK_NE(base, expected);
2345 __ PoisonHeapReference(expected);
2346 __ PoisonHeapReference(value);
2347 }
2348 __ LockCmpxchgl(field_addr, value);
2349
2350 // LOCK CMPXCHG has full barrier semantics, and we don't need
2351 // scheduling barriers at this time.
2352
2353 if (is_cmpxchg) {
2354 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2355 __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
2356 } else {
2357 // Convert ZF into the Boolean result.
2358 __ setb(kZero, out.AsRegister<Register>());
2359 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2360 }
2361
2362 // Mark card for object if the new value is stored.
2363 bool value_can_be_null = true; // TODO: Worth finding out this information?
2364 NearLabel skip_mark_gc_card;
2365 __ j(kNotZero, &skip_mark_gc_card);
2366 codegen->MaybeMarkGCCard(temp, temp2, base, value, value_can_be_null);
2367 __ Bind(&skip_mark_gc_card);
2368
2369 // If heap poisoning is enabled, we need to unpoison the values
2370 // that were poisoned earlier.
2371 if (kPoisonHeapReferences) {
2372 if (base_equals_value) {
2373 // `value` has been moved to a temporary register, no need to
2374 // unpoison it.
2375 } else {
2376 // Ensure `value` is different from `out`, so that unpoisoning
2377 // the former does not invalidate the latter.
2378 DCHECK_NE(value, out.AsRegister<Register>());
2379 __ UnpoisonHeapReference(value);
2380 }
2381 }
2382 // Do not unpoison the reference contained in register
2383 // `expected`, as it is the same as register `out` (EAX).
2384 }
2385
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2386 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2387 LocationSummary* locations = invoke->GetLocations();
2388
2389 Register base = locations->InAt(1).AsRegister<Register>();
2390 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2391 Location expected_value = locations->InAt(3);
2392 Location new_value = locations->InAt(4);
2393 Location out = locations->Out();
2394 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2395
2396 if (type == DataType::Type::kReference) {
2397 // The only read barrier implementation supporting the
2398 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2399 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2400
2401 Register temp = locations->GetTemp(0).AsRegister<Register>();
2402 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2403 GenReferenceCAS(invoke, codegen, expected_value, new_value, base, offset, temp, temp2);
2404 } else {
2405 DCHECK(!DataType::IsFloatingPointType(type));
2406 GenPrimitiveCAS(type, codegen, expected_value, new_value, base, offset, out);
2407 }
2408 }
2409
VisitUnsafeCASInt(HInvoke * invoke)2410 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2411 VisitJdkUnsafeCASInt(invoke);
2412 }
2413
VisitUnsafeCASLong(HInvoke * invoke)2414 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2415 VisitJdkUnsafeCASLong(invoke);
2416 }
2417
VisitUnsafeCASObject(HInvoke * invoke)2418 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2419 // The only read barrier implementation supporting the
2420 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2421 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2422
2423 GenCAS(DataType::Type::kReference, invoke, codegen_);
2424 }
2425
VisitJdkUnsafeCASInt(HInvoke * invoke)2426 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2427 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2428 VisitJdkUnsafeCompareAndSetInt(invoke);
2429 }
2430
VisitJdkUnsafeCASLong(HInvoke * invoke)2431 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2432 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2433 VisitJdkUnsafeCompareAndSetLong(invoke);
2434 }
2435
VisitJdkUnsafeCASObject(HInvoke * invoke)2436 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2437 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2438 VisitJdkUnsafeCompareAndSetReference(invoke);
2439 }
2440
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2441 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2442 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2443 }
2444
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2445 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2446 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2447 }
2448
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2449 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2450 // The only supported read barrier implementation is the Baker-style read barriers.
2451 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2452
2453 GenCAS(DataType::Type::kReference, invoke, codegen_);
2454 }
2455
2456 // Note: Unlike other architectures that use corresponding enums for the `VarHandle`
2457 // implementation, x86 is currently using it only for `Unsafe`.
2458 enum class GetAndUpdateOp {
2459 kSet,
2460 kAdd,
2461 };
2462
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen,DataType::Type type,GetAndUpdateOp get_and_unsafe_op)2463 void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2464 HInvoke* invoke,
2465 CodeGeneratorX86* codegen,
2466 DataType::Type type,
2467 GetAndUpdateOp get_and_unsafe_op) {
2468 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2469 LocationSummary* locations =
2470 new (allocator) LocationSummary(invoke,
2471 can_call
2472 ? LocationSummary::kCallOnSlowPath
2473 : LocationSummary::kNoCall,
2474 kIntrinsified);
2475 if (can_call && kUseBakerReadBarrier) {
2476 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2477 }
2478 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2479 if (type == DataType::Type::kInt64) {
2480 // Explicitly allocate all registers.
2481 locations->SetInAt(1, Location::RegisterLocation(EBP));
2482 if (get_and_unsafe_op == GetAndUpdateOp::kAdd) {
2483 locations->AddTemp(Location::RegisterLocation(EBP)); // We shall clobber EBP.
2484 locations->SetInAt(2, Location::Any()); // Offset shall be on the stack.
2485 locations->SetInAt(3, Location::RegisterPairLocation(ESI, EDI));
2486 locations->AddTemp(Location::RegisterLocation(EBX));
2487 locations->AddTemp(Location::RegisterLocation(ECX));
2488 } else {
2489 locations->SetInAt(2, Location::RegisterPairLocation(ESI, EDI));
2490 locations->SetInAt(3, Location::RegisterPairLocation(EBX, ECX));
2491 }
2492 locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap);
2493 } else {
2494 locations->SetInAt(1, Location::RequiresRegister());
2495 locations->SetInAt(2, Location::RequiresRegister());
2496 // Use the same register for both the output and the new value or addend
2497 // to take advantage of XCHG or XADD. Arbitrarily pick EAX.
2498 locations->SetInAt(3, Location::RegisterLocation(EAX));
2499 locations->SetOut(Location::RegisterLocation(EAX));
2500 }
2501 }
2502
VisitUnsafeGetAndAddInt(HInvoke * invoke)2503 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2504 VisitJdkUnsafeGetAndAddInt(invoke);
2505 }
2506
VisitUnsafeGetAndAddLong(HInvoke * invoke)2507 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2508 VisitJdkUnsafeGetAndAddLong(invoke);
2509 }
2510
VisitUnsafeGetAndSetInt(HInvoke * invoke)2511 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2512 VisitJdkUnsafeGetAndSetInt(invoke);
2513 }
2514
VisitUnsafeGetAndSetLong(HInvoke * invoke)2515 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2516 VisitJdkUnsafeGetAndSetLong(invoke);
2517 }
2518
VisitUnsafeGetAndSetObject(HInvoke * invoke)2519 void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2520 VisitJdkUnsafeGetAndSetReference(invoke);
2521 }
2522
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2523 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2524 CreateUnsafeGetAndUpdateLocations(
2525 allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
2526 }
2527
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2528 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2529 CreateUnsafeGetAndUpdateLocations(
2530 allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
2531 }
2532
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2533 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2534 CreateUnsafeGetAndUpdateLocations(
2535 allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
2536 }
2537
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2538 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2539 CreateUnsafeGetAndUpdateLocations(
2540 allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
2541 }
2542
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2543 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2544 // The only supported read barrier implementation is the Baker-style read barriers.
2545 if (codegen_->EmitNonBakerReadBarrier()) {
2546 return;
2547 }
2548
2549 CreateUnsafeGetAndUpdateLocations(
2550 allocator_, invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
2551 LocationSummary* locations = invoke->GetLocations();
2552 locations->AddTemp(Location::RequiresRegister());
2553 locations->AddTemp(Location::RegisterLocation(ECX)); // Byte register for `MarkGCCard()`.
2554 }
2555
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86 * codegen,GetAndUpdateOp get_and_update_op)2556 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2557 DataType::Type type,
2558 CodeGeneratorX86* codegen,
2559 GetAndUpdateOp get_and_update_op) {
2560 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2561 LocationSummary* locations = invoke->GetLocations();
2562
2563 Location out = locations->Out(); // Result.
2564 Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
2565 Location offset = locations->InAt(2); // Long offset.
2566 Location arg = locations->InAt(3); // New value or addend.
2567
2568 if (type == DataType::Type::kInt32) {
2569 DCHECK(out.Equals(arg));
2570 Register out_reg = out.AsRegister<Register>();
2571 Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2572 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2573 __ LockXaddl(field_address, out_reg);
2574 } else {
2575 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2576 __ xchgl(out_reg, field_address);
2577 }
2578 } else if (type == DataType::Type::kInt64) {
2579 // Prepare the field address. Ignore the high 32 bits of the `offset`.
2580 Address field_address_low(kNoRegister, 0), field_address_high(kNoRegister, 0);
2581 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2582 DCHECK(offset.IsDoubleStackSlot());
2583 __ addl(base, Address(ESP, offset.GetStackIndex())); // Clobbers `base`.
2584 DCHECK(Location::RegisterLocation(base).Equals(locations->GetTemp(0)));
2585 field_address_low = Address(base, 0);
2586 field_address_high = Address(base, 4);
2587 } else {
2588 field_address_low = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2589 field_address_high = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 4);
2590 }
2591 // Load the old value to EDX:EAX and use LOCK CMPXCHG8B to set the new value.
2592 NearLabel loop;
2593 __ Bind(&loop);
2594 __ movl(EAX, field_address_low);
2595 __ movl(EDX, field_address_high);
2596 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2597 DCHECK(Location::RegisterPairLocation(ESI, EDI).Equals(arg));
2598 __ movl(EBX, EAX);
2599 __ movl(ECX, EDX);
2600 __ addl(EBX, ESI);
2601 __ adcl(ECX, EDI);
2602 } else {
2603 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2604 DCHECK(Location::RegisterPairLocation(EBX, ECX).Equals(arg));
2605 }
2606 __ LockCmpxchg8b(field_address_low);
2607 __ j(kNotEqual, &loop); // Repeat on failure.
2608 } else {
2609 DCHECK_EQ(type, DataType::Type::kReference);
2610 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2611 Register out_reg = out.AsRegister<Register>();
2612 Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
2613 Register temp1 = locations->GetTemp(0).AsRegister<Register>();
2614 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2615
2616 if (codegen->EmitReadBarrier()) {
2617 DCHECK(kUseBakerReadBarrier);
2618 // Ensure that the field contains a to-space reference.
2619 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2620 invoke,
2621 Location::RegisterLocation(temp2),
2622 base,
2623 field_address,
2624 /*needs_null_check=*/ false,
2625 /*always_update_field=*/ true,
2626 &temp1);
2627 }
2628
2629 // Mark card for object as a new value shall be stored.
2630 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
2631 DCHECK_EQ(temp2, ECX); // Byte register for `MarkGCCard()`.
2632 codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_reg, new_value_can_be_null);
2633
2634 if (kPoisonHeapReferences) {
2635 // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2636 // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2637 __ movl(temp1, out_reg);
2638 __ PoisonHeapReference(temp1);
2639 __ xchgl(temp1, field_address);
2640 __ UnpoisonHeapReference(temp1);
2641 __ movl(out_reg, temp1);
2642 } else {
2643 __ xchgl(out_reg, field_address);
2644 }
2645 }
2646 }
2647
VisitUnsafeGetAndAddInt(HInvoke * invoke)2648 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2649 VisitJdkUnsafeGetAndAddInt(invoke);
2650 }
2651
VisitUnsafeGetAndAddLong(HInvoke * invoke)2652 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2653 VisitJdkUnsafeGetAndAddLong(invoke);
2654 }
2655
VisitUnsafeGetAndSetInt(HInvoke * invoke)2656 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2657 VisitJdkUnsafeGetAndSetInt(invoke);
2658 }
2659
VisitUnsafeGetAndSetLong(HInvoke * invoke)2660 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2661 VisitJdkUnsafeGetAndSetLong(invoke);
2662 }
2663
VisitUnsafeGetAndSetObject(HInvoke * invoke)2664 void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2665 VisitJdkUnsafeGetAndSetReference(invoke);
2666 }
2667
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2668 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2669 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2670 }
2671
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2672 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2673 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2674 }
2675
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2676 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2677 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2678 }
2679
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2680 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2681 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2682 }
2683
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2684 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2685 GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2686 }
2687
VisitIntegerReverse(HInvoke * invoke)2688 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2689 LocationSummary* locations =
2690 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2691 locations->SetInAt(0, Location::RequiresRegister());
2692 locations->SetOut(Location::SameAsFirstInput());
2693 locations->AddTemp(Location::RequiresRegister());
2694 }
2695
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2696 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2697 X86Assembler* assembler) {
2698 Immediate imm_shift(shift);
2699 Immediate imm_mask(mask);
2700 __ movl(temp, reg);
2701 __ shrl(reg, imm_shift);
2702 __ andl(temp, imm_mask);
2703 __ andl(reg, imm_mask);
2704 __ shll(temp, imm_shift);
2705 __ orl(reg, temp);
2706 }
2707
VisitIntegerReverse(HInvoke * invoke)2708 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2709 X86Assembler* assembler = GetAssembler();
2710 LocationSummary* locations = invoke->GetLocations();
2711
2712 Register reg = locations->InAt(0).AsRegister<Register>();
2713 Register temp = locations->GetTemp(0).AsRegister<Register>();
2714
2715 /*
2716 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2717 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2718 * compared to generic luni implementation which has 5 rounds of swapping bits.
2719 * x = bswap x
2720 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2721 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2722 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2723 */
2724 __ bswapl(reg);
2725 SwapBits(reg, temp, 1, 0x55555555, assembler);
2726 SwapBits(reg, temp, 2, 0x33333333, assembler);
2727 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2728 }
2729
VisitLongReverse(HInvoke * invoke)2730 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2731 LocationSummary* locations =
2732 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2733 locations->SetInAt(0, Location::RequiresRegister());
2734 locations->SetOut(Location::SameAsFirstInput());
2735 locations->AddTemp(Location::RequiresRegister());
2736 }
2737
VisitLongReverse(HInvoke * invoke)2738 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2739 X86Assembler* assembler = GetAssembler();
2740 LocationSummary* locations = invoke->GetLocations();
2741
2742 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2743 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2744 Register temp = locations->GetTemp(0).AsRegister<Register>();
2745
2746 // We want to swap high/low, then bswap each one, and then do the same
2747 // as a 32 bit reverse.
2748 // Exchange high and low.
2749 __ movl(temp, reg_low);
2750 __ movl(reg_low, reg_high);
2751 __ movl(reg_high, temp);
2752
2753 // bit-reverse low
2754 __ bswapl(reg_low);
2755 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2756 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2757 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2758
2759 // bit-reverse high
2760 __ bswapl(reg_high);
2761 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2762 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2763 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2764 }
2765
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2766 static void CreateBitCountLocations(
2767 ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2768 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2769 // Do nothing if there is no popcnt support. This results in generating
2770 // a call for the intrinsic rather than direct code.
2771 return;
2772 }
2773 LocationSummary* locations =
2774 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2775 if (is_long) {
2776 locations->AddTemp(Location::RequiresRegister());
2777 }
2778 locations->SetInAt(0, Location::Any());
2779 locations->SetOut(Location::RequiresRegister());
2780 }
2781
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2782 static void GenBitCount(X86Assembler* assembler,
2783 CodeGeneratorX86* codegen,
2784 HInvoke* invoke, bool is_long) {
2785 LocationSummary* locations = invoke->GetLocations();
2786 Location src = locations->InAt(0);
2787 Register out = locations->Out().AsRegister<Register>();
2788
2789 if (invoke->InputAt(0)->IsConstant()) {
2790 // Evaluate this at compile time.
2791 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2792 int32_t result = is_long
2793 ? POPCOUNT(static_cast<uint64_t>(value))
2794 : POPCOUNT(static_cast<uint32_t>(value));
2795 codegen->Load32BitValue(out, result);
2796 return;
2797 }
2798
2799 // Handle the non-constant cases.
2800 if (!is_long) {
2801 if (src.IsRegister()) {
2802 __ popcntl(out, src.AsRegister<Register>());
2803 } else {
2804 DCHECK(src.IsStackSlot());
2805 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2806 }
2807 } else {
2808 // The 64-bit case needs to worry about two parts.
2809 Register temp = locations->GetTemp(0).AsRegister<Register>();
2810 if (src.IsRegisterPair()) {
2811 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2812 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2813 } else {
2814 DCHECK(src.IsDoubleStackSlot());
2815 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2816 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2817 }
2818 __ addl(out, temp);
2819 }
2820 }
2821
VisitIntegerBitCount(HInvoke * invoke)2822 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2823 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2824 }
2825
VisitIntegerBitCount(HInvoke * invoke)2826 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2827 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2828 }
2829
VisitLongBitCount(HInvoke * invoke)2830 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2831 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2832 }
2833
VisitLongBitCount(HInvoke * invoke)2834 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2835 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2836 }
2837
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2838 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2839 LocationSummary* locations =
2840 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2841 if (is_long) {
2842 locations->SetInAt(0, Location::RequiresRegister());
2843 } else {
2844 locations->SetInAt(0, Location::Any());
2845 }
2846 locations->SetOut(Location::RequiresRegister());
2847 }
2848
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2849 static void GenLeadingZeros(X86Assembler* assembler,
2850 CodeGeneratorX86* codegen,
2851 HInvoke* invoke, bool is_long) {
2852 LocationSummary* locations = invoke->GetLocations();
2853 Location src = locations->InAt(0);
2854 Register out = locations->Out().AsRegister<Register>();
2855
2856 if (invoke->InputAt(0)->IsConstant()) {
2857 // Evaluate this at compile time.
2858 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2859 if (value == 0) {
2860 value = is_long ? 64 : 32;
2861 } else {
2862 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2863 }
2864 codegen->Load32BitValue(out, value);
2865 return;
2866 }
2867
2868 // Handle the non-constant cases.
2869 if (!is_long) {
2870 if (src.IsRegister()) {
2871 __ bsrl(out, src.AsRegister<Register>());
2872 } else {
2873 DCHECK(src.IsStackSlot());
2874 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2875 }
2876
2877 // BSR sets ZF if the input was zero, and the output is undefined.
2878 NearLabel all_zeroes, done;
2879 __ j(kEqual, &all_zeroes);
2880
2881 // Correct the result from BSR to get the final CLZ result.
2882 __ xorl(out, Immediate(31));
2883 __ jmp(&done);
2884
2885 // Fix the zero case with the expected result.
2886 __ Bind(&all_zeroes);
2887 __ movl(out, Immediate(32));
2888
2889 __ Bind(&done);
2890 return;
2891 }
2892
2893 // 64 bit case needs to worry about both parts of the register.
2894 DCHECK(src.IsRegisterPair());
2895 Register src_lo = src.AsRegisterPairLow<Register>();
2896 Register src_hi = src.AsRegisterPairHigh<Register>();
2897 NearLabel handle_low, done, all_zeroes;
2898
2899 // Is the high word zero?
2900 __ testl(src_hi, src_hi);
2901 __ j(kEqual, &handle_low);
2902
2903 // High word is not zero. We know that the BSR result is defined in this case.
2904 __ bsrl(out, src_hi);
2905
2906 // Correct the result from BSR to get the final CLZ result.
2907 __ xorl(out, Immediate(31));
2908 __ jmp(&done);
2909
2910 // High word was zero. We have to compute the low word count and add 32.
2911 __ Bind(&handle_low);
2912 __ bsrl(out, src_lo);
2913 __ j(kEqual, &all_zeroes);
2914
2915 // We had a valid result. Use an XOR to both correct the result and add 32.
2916 __ xorl(out, Immediate(63));
2917 __ jmp(&done);
2918
2919 // All zero case.
2920 __ Bind(&all_zeroes);
2921 __ movl(out, Immediate(64));
2922
2923 __ Bind(&done);
2924 }
2925
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2926 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2927 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2928 }
2929
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2930 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2931 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2932 }
2933
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2934 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2935 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2936 }
2937
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2938 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2939 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2940 }
2941
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2942 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2943 LocationSummary* locations =
2944 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2945 if (is_long) {
2946 locations->SetInAt(0, Location::RequiresRegister());
2947 } else {
2948 locations->SetInAt(0, Location::Any());
2949 }
2950 locations->SetOut(Location::RequiresRegister());
2951 }
2952
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2953 static void GenTrailingZeros(X86Assembler* assembler,
2954 CodeGeneratorX86* codegen,
2955 HInvoke* invoke, bool is_long) {
2956 LocationSummary* locations = invoke->GetLocations();
2957 Location src = locations->InAt(0);
2958 Register out = locations->Out().AsRegister<Register>();
2959
2960 if (invoke->InputAt(0)->IsConstant()) {
2961 // Evaluate this at compile time.
2962 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2963 if (value == 0) {
2964 value = is_long ? 64 : 32;
2965 } else {
2966 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2967 }
2968 codegen->Load32BitValue(out, value);
2969 return;
2970 }
2971
2972 // Handle the non-constant cases.
2973 if (!is_long) {
2974 if (src.IsRegister()) {
2975 __ bsfl(out, src.AsRegister<Register>());
2976 } else {
2977 DCHECK(src.IsStackSlot());
2978 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2979 }
2980
2981 // BSF sets ZF if the input was zero, and the output is undefined.
2982 NearLabel done;
2983 __ j(kNotEqual, &done);
2984
2985 // Fix the zero case with the expected result.
2986 __ movl(out, Immediate(32));
2987
2988 __ Bind(&done);
2989 return;
2990 }
2991
2992 // 64 bit case needs to worry about both parts of the register.
2993 DCHECK(src.IsRegisterPair());
2994 Register src_lo = src.AsRegisterPairLow<Register>();
2995 Register src_hi = src.AsRegisterPairHigh<Register>();
2996 NearLabel done, all_zeroes;
2997
2998 // If the low word is zero, then ZF will be set. If not, we have the answer.
2999 __ bsfl(out, src_lo);
3000 __ j(kNotEqual, &done);
3001
3002 // Low word was zero. We have to compute the high word count and add 32.
3003 __ bsfl(out, src_hi);
3004 __ j(kEqual, &all_zeroes);
3005
3006 // We had a valid result. Add 32 to account for the low word being zero.
3007 __ addl(out, Immediate(32));
3008 __ jmp(&done);
3009
3010 // All zero case.
3011 __ Bind(&all_zeroes);
3012 __ movl(out, Immediate(64));
3013
3014 __ Bind(&done);
3015 }
3016
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3017 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3018 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
3019 }
3020
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3021 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3022 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3023 }
3024
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3025 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3026 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
3027 }
3028
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3029 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3030 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3031 }
3032
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)3033 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
3034 return instruction->InputAt(input0) == instruction->InputAt(input1);
3035 }
3036
VisitSystemArrayCopy(HInvoke * invoke)3037 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
3038 // The only read barrier implementation supporting the
3039 // SystemArrayCopy intrinsic is the Baker-style read barriers.
3040 if (codegen_->EmitNonBakerReadBarrier()) {
3041 return;
3042 }
3043
3044 constexpr int32_t kLengthThreshold = -1; // No cut-off - handle large arrays in intrinsic code.
3045 constexpr size_t kInitialNumTemps = 0u; // We shall allocate temps explicitly.
3046 LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
3047 invoke, kLengthThreshold, kInitialNumTemps);
3048 if (locations != nullptr) {
3049 // Add temporaries. We will use REP MOVSL, so we need fixed registers.
3050 DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
3051 locations->AddTemp(Location::RegisterLocation(ESI));
3052 locations->AddTemp(Location::RegisterLocation(EDI));
3053 locations->AddTemp(Location::RegisterLocation(ECX)); // Byte reg also used for write barrier.
3054
3055 static constexpr size_t kSrc = 0;
3056 static constexpr size_t kSrcPos = 1;
3057 static constexpr size_t kDest = 2;
3058 static constexpr size_t kDestPos = 3;
3059 static constexpr size_t kLength = 4;
3060
3061 if (!locations->InAt(kLength).IsConstant()) {
3062 // We may not have enough registers for all inputs and temps, so put the
3063 // non-const length explicitly to the same register as one of the temps.
3064 locations->SetInAt(kLength, Location::RegisterLocation(ECX));
3065 }
3066
3067 if (codegen_->EmitBakerReadBarrier()) {
3068 // We need an additional temp in the slow path for holding the reference.
3069 if (locations->InAt(kSrcPos).IsConstant() ||
3070 locations->InAt(kDestPos).IsConstant() ||
3071 IsSameInput(invoke, kSrc, kDest) ||
3072 IsSameInput(invoke, kSrcPos, kDestPos)) {
3073 // We can allocate another temp register.
3074 locations->AddTemp(Location::RequiresRegister());
3075 } else {
3076 // Use the same fixed register for the non-const `src_pos` and the additional temp.
3077 // The `src_pos` is no longer needed when we reach the slow path.
3078 locations->SetInAt(kSrcPos, Location::RegisterLocation(EDX));
3079 locations->AddTemp(Location::RegisterLocation(EDX));
3080 }
3081 }
3082 }
3083 }
3084
VisitSystemArrayCopy(HInvoke * invoke)3085 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
3086 // The only read barrier implementation supporting the
3087 // SystemArrayCopy intrinsic is the Baker-style read barriers.
3088 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
3089
3090 X86Assembler* assembler = GetAssembler();
3091 LocationSummary* locations = invoke->GetLocations();
3092
3093 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3094 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3095 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3096 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3097 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3098
3099 Register src = locations->InAt(0).AsRegister<Register>();
3100 Location src_pos = locations->InAt(1);
3101 Register dest = locations->InAt(2).AsRegister<Register>();
3102 Location dest_pos = locations->InAt(3);
3103 Location length = locations->InAt(4);
3104 Location temp1_loc = locations->GetTemp(0);
3105 Register temp1 = temp1_loc.AsRegister<Register>();
3106 Location temp2_loc = locations->GetTemp(1);
3107 Register temp2 = temp2_loc.AsRegister<Register>();
3108
3109 SlowPathCode* intrinsic_slow_path =
3110 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3111 codegen_->AddSlowPath(intrinsic_slow_path);
3112
3113 NearLabel conditions_on_positions_validated;
3114 SystemArrayCopyOptimizations optimizations(invoke);
3115
3116 // If source and destination are the same, we go to slow path if we need to do forward copying.
3117 // We do not need to do this check if the source and destination positions are the same.
3118 if (!optimizations.GetSourcePositionIsDestinationPosition()) {
3119 if (src_pos.IsConstant()) {
3120 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
3121 if (dest_pos.IsConstant()) {
3122 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3123 if (optimizations.GetDestinationIsSource()) {
3124 // Checked when building locations.
3125 DCHECK_GE(src_pos_constant, dest_pos_constant);
3126 } else if (src_pos_constant < dest_pos_constant) {
3127 __ cmpl(src, dest);
3128 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3129 }
3130 } else {
3131 if (!optimizations.GetDestinationIsSource()) {
3132 __ cmpl(src, dest);
3133 __ j(kNotEqual, &conditions_on_positions_validated);
3134 }
3135 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
3136 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
3137 }
3138 } else {
3139 if (!optimizations.GetDestinationIsSource()) {
3140 __ cmpl(src, dest);
3141 __ j(kNotEqual, &conditions_on_positions_validated);
3142 }
3143 Register src_pos_reg = src_pos.AsRegister<Register>();
3144 EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
3145 }
3146 }
3147
3148 __ Bind(&conditions_on_positions_validated);
3149
3150 if (!optimizations.GetSourceIsNotNull()) {
3151 // Bail out if the source is null.
3152 __ testl(src, src);
3153 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3154 }
3155
3156 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3157 // Bail out if the destination is null.
3158 __ testl(dest, dest);
3159 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3160 }
3161
3162 // If the length is negative, bail out.
3163 // We have already checked in the LocationsBuilder for the constant case.
3164 if (!length.IsConstant() &&
3165 !optimizations.GetCountIsSourceLength() &&
3166 !optimizations.GetCountIsDestinationLength()) {
3167 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3168 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3169 }
3170
3171 // Validity checks: source.
3172 CheckSystemArrayCopyPosition(assembler,
3173 src,
3174 src_pos,
3175 length,
3176 intrinsic_slow_path,
3177 temp1,
3178 optimizations.GetCountIsSourceLength(),
3179 /*position_sign_checked=*/ false);
3180
3181 // Validity checks: dest.
3182 bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
3183 CheckSystemArrayCopyPosition(assembler,
3184 dest,
3185 dest_pos,
3186 length,
3187 intrinsic_slow_path,
3188 temp1,
3189 optimizations.GetCountIsDestinationLength(),
3190 dest_position_sign_checked);
3191
3192 auto check_non_primitive_array_class = [&](Register klass, Register temp) {
3193 // No read barrier is needed for reading a chain of constant references for comparing
3194 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3195 // /* HeapReference<Class> */ temp = klass->component_type_
3196 __ movl(temp, Address(klass, component_offset));
3197 __ MaybeUnpoisonHeapReference(temp);
3198 // Check that the component type is not null.
3199 __ testl(temp, temp);
3200 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3201 // Check that the component type is not a primitive.
3202 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
3203 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3204 };
3205
3206 if (!optimizations.GetDoesNotNeedTypeCheck()) {
3207 // Check whether all elements of the source array are assignable to the component
3208 // type of the destination array. We do two checks: the classes are the same,
3209 // or the destination is Object[]. If none of these checks succeed, we go to the
3210 // slow path.
3211
3212 if (codegen_->EmitBakerReadBarrier()) {
3213 // /* HeapReference<Class> */ temp1 = dest->klass_
3214 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3215 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
3216 // Register `temp1` is not trashed by the read barrier emitted
3217 // by GenerateFieldLoadWithBakerReadBarrier below, as that
3218 // method produces a call to a ReadBarrierMarkRegX entry point,
3219 // which saves all potentially live registers, including
3220 // temporaries such a `temp1`.
3221 // /* HeapReference<Class> */ temp2 = src->klass_
3222 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3223 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
3224 } else {
3225 // /* HeapReference<Class> */ temp1 = dest->klass_
3226 __ movl(temp1, Address(dest, class_offset));
3227 __ MaybeUnpoisonHeapReference(temp1);
3228 // /* HeapReference<Class> */ temp2 = src->klass_
3229 __ movl(temp2, Address(src, class_offset));
3230 __ MaybeUnpoisonHeapReference(temp2);
3231 }
3232
3233 __ cmpl(temp1, temp2);
3234 if (optimizations.GetDestinationIsTypedObjectArray()) {
3235 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3236 NearLabel do_copy;
3237 // For class match, we can skip the source type check regardless of the optimization flag.
3238 __ j(kEqual, &do_copy);
3239 // No read barrier is needed for reading a chain of constant references
3240 // for comparing with null, see `ReadBarrierOption`.
3241 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3242 __ movl(temp1, Address(temp1, component_offset));
3243 __ MaybeUnpoisonHeapReference(temp1);
3244 // No need to unpoison the following heap reference load, as
3245 // we're comparing against null.
3246 __ cmpl(Address(temp1, super_offset), Immediate(0));
3247 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3248 // Bail out if the source is not a non primitive array.
3249 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3250 check_non_primitive_array_class(temp2, temp2);
3251 }
3252 __ Bind(&do_copy);
3253 } else {
3254 DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
3255 // For class match, we can skip the array type check completely if at least one of source
3256 // and destination is known to be a non primitive array, otherwise one check is enough.
3257 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3258 if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
3259 !optimizations.GetSourceIsNonPrimitiveArray()) {
3260 check_non_primitive_array_class(temp2, temp2);
3261 }
3262 }
3263 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3264 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3265 // Bail out if the source is not a non primitive array.
3266 // No read barrier is needed for reading a chain of constant references for comparing
3267 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
3268 // /* HeapReference<Class> */ temp1 = src->klass_
3269 __ movl(temp1, Address(src, class_offset));
3270 __ MaybeUnpoisonHeapReference(temp1);
3271 check_non_primitive_array_class(temp1, temp1);
3272 }
3273
3274 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
3275 // Null constant length: not need to emit the loop code at all.
3276 } else {
3277 const DataType::Type type = DataType::Type::kReference;
3278 const size_t data_size = DataType::Size(type);
3279 const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
3280
3281 // Don't enter copy loop if `length == 0`.
3282 NearLabel skip_copy_and_write_barrier;
3283 if (!length.IsConstant()) {
3284 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3285 __ j(kEqual, &skip_copy_and_write_barrier);
3286 }
3287
3288 // Compute the base source address in `temp1`.
3289 GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
3290 // Compute the base destination address in `temp2`.
3291 GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
3292
3293 SlowPathCode* read_barrier_slow_path = nullptr;
3294 if (codegen_->EmitBakerReadBarrier()) {
3295 // SystemArrayCopy implementation for Baker read barriers (see
3296 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3297 //
3298 // if (src_ptr != end_ptr) {
3299 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3300 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
3301 // bool is_gray = (rb_state == ReadBarrier::GrayState());
3302 // if (is_gray) {
3303 // // Slow-path copy.
3304 // for (size_t i = 0; i != length; ++i) {
3305 // dest_array[dest_pos + i] =
3306 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3307 // }
3308 // } else {
3309 // // Fast-path copy.
3310 // do {
3311 // *dest_ptr++ = *src_ptr++;
3312 // } while (src_ptr != end_ptr)
3313 // }
3314 // }
3315
3316 // Given the numeric representation, it's enough to check the low bit of the rb_state.
3317 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3318 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3319 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3320 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3321 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3322
3323 // if (rb_state == ReadBarrier::GrayState())
3324 // goto slow_path;
3325 // At this point, just do the "if" and make sure that flags are preserved until the branch.
3326 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
3327
3328 // Load fence to prevent load-load reordering.
3329 // Note that this is a no-op, thanks to the x86 memory model.
3330 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3331
3332 // Slow path used to copy array when `src` is gray.
3333 read_barrier_slow_path =
3334 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3335 codegen_->AddSlowPath(read_barrier_slow_path);
3336
3337 // We have done the "if" of the gray bit check above, now branch based on the flags.
3338 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
3339 }
3340
3341 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
3342 if (length.IsConstant()) {
3343 __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
3344 } else {
3345 DCHECK_EQ(temp3, length.AsRegister<Register>());
3346 }
3347
3348 // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
3349 DCHECK_EQ(temp1, ESI);
3350 DCHECK_EQ(temp2, EDI);
3351 DCHECK_EQ(temp3, ECX);
3352 __ rep_movsl();
3353
3354 if (read_barrier_slow_path != nullptr) {
3355 DCHECK(codegen_->EmitBakerReadBarrier());
3356 __ Bind(read_barrier_slow_path->GetExitLabel());
3357 }
3358
3359 // We only need one card marking on the destination array.
3360 codegen_->MarkGCCard(temp1, temp3, dest);
3361
3362 __ Bind(&skip_copy_and_write_barrier);
3363 }
3364
3365 __ Bind(intrinsic_slow_path->GetExitLabel());
3366 }
3367
RequestBaseMethodAddressInRegister(HInvoke * invoke)3368 static void RequestBaseMethodAddressInRegister(HInvoke* invoke) {
3369 LocationSummary* locations = invoke->GetLocations();
3370 if (locations != nullptr) {
3371 HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
3372 // Note: The base method address is not present yet when this is called from the
3373 // PCRelativeHandlerVisitor via IsCallFreeIntrinsic() to determine whether to insert it.
3374 if (invoke_static_or_direct->HasSpecialInput()) {
3375 DCHECK(invoke_static_or_direct->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
3376 ->IsX86ComputeBaseMethodAddress());
3377 locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
3378 Location::RequiresRegister());
3379 }
3380 }
3381 }
3382
3383 #define VISIT_INTRINSIC(name, low, high, type, start_index) \
3384 void IntrinsicLocationsBuilderX86::Visit##name##ValueOf(HInvoke* invoke) { \
3385 InvokeRuntimeCallingConvention calling_convention; \
3386 IntrinsicVisitor::ComputeValueOfLocations( \
3387 invoke, \
3388 codegen_, \
3389 low, \
3390 (high) - (low) + 1, \
3391 Location::RegisterLocation(EAX), \
3392 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \
3393 RequestBaseMethodAddressInRegister(invoke); \
3394 } \
3395 void IntrinsicCodeGeneratorX86::Visit##name##ValueOf(HInvoke* invoke) { \
3396 IntrinsicVisitor::ValueOfInfo info = \
3397 IntrinsicVisitor::ComputeValueOfInfo(invoke, \
3398 codegen_->GetCompilerOptions(), \
3399 WellKnownClasses::java_lang_##name##_value, \
3400 low, \
3401 (high) - (low) + 1, \
3402 start_index); \
3403 HandleValueOf(invoke, info, type); \
3404 }
BOXED_TYPES(VISIT_INTRINSIC)3405 BOXED_TYPES(VISIT_INTRINSIC)
3406 #undef VISIT_INTRINSIC
3407
3408 void IntrinsicCodeGeneratorX86::HandleValueOf(HInvoke* invoke,
3409 const IntrinsicVisitor::ValueOfInfo& info,
3410 DataType::Type type) {
3411 DCHECK(invoke->IsInvokeStaticOrDirect());
3412 LocationSummary* locations = invoke->GetLocations();
3413 X86Assembler* assembler = GetAssembler();
3414
3415 Register out = locations->Out().AsRegister<Register>();
3416 auto allocate_instance = [&]() {
3417 DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
3418 codegen_->LoadIntrinsicDeclaringClass(out, invoke->AsInvokeStaticOrDirect());
3419 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3420 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3421 };
3422 if (invoke->InputAt(0)->IsIntConstant()) {
3423 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3424 if (static_cast<uint32_t>(value - info.low) < info.length) {
3425 // Just embed the object in the code.
3426 DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3427 codegen_->LoadBootImageAddress(
3428 out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
3429 } else {
3430 DCHECK(locations->CanCall());
3431 // Allocate and initialize a new j.l.Integer.
3432 // TODO: If we JIT, we could allocate the object now, and store it in the
3433 // JIT object table.
3434 allocate_instance();
3435 codegen_->MoveToMemory(type,
3436 Location::ConstantLocation(invoke->InputAt(0)->AsIntConstant()),
3437 out,
3438 /* dst_index= */ Register::kNoRegister,
3439 /* dst_scale= */ TIMES_1,
3440 /* dst_disp= */ info.value_offset);
3441 }
3442 } else {
3443 DCHECK(locations->CanCall());
3444 Register in = locations->InAt(0).AsRegister<Register>();
3445 // Check bounds of our cache.
3446 __ leal(out, Address(in, -info.low));
3447 __ cmpl(out, Immediate(info.length));
3448 NearLabel allocate, done;
3449 __ j(kAboveEqual, &allocate);
3450 // If the value is within the bounds, load the object directly from the array.
3451 constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3452 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3453 "Check heap reference size.");
3454 if (codegen_->GetCompilerOptions().IsBootImage()) {
3455 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3456 size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3457 HX86ComputeBaseMethodAddress* method_address =
3458 invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3459 DCHECK(method_address != nullptr);
3460 Register method_address_reg =
3461 invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3462 __ movl(out,
3463 Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
3464 codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3465 } else {
3466 // Note: We're about to clobber the index in `out`, so we need to use `in` and
3467 // adjust the offset accordingly.
3468 uint32_t mid_array_boot_image_offset =
3469 info.array_data_boot_image_reference - info.low * kElementSize;
3470 codegen_->LoadBootImageAddress(
3471 out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3472 DCHECK_NE(out, in);
3473 __ movl(out, Address(out, in, TIMES_4, 0));
3474 }
3475 __ MaybeUnpoisonHeapReference(out);
3476 __ jmp(&done);
3477 __ Bind(&allocate);
3478 // Otherwise allocate and initialize a new object.
3479 allocate_instance();
3480 codegen_->MoveToMemory(type,
3481 Location::RegisterLocation(in),
3482 out,
3483 /* dst_index= */ Register::kNoRegister,
3484 /* dst_scale= */ TIMES_1,
3485 /* dst_disp= */ info.value_offset);
3486 __ Bind(&done);
3487 }
3488 }
3489
VisitReferenceGetReferent(HInvoke * invoke)3490 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
3491 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3492 RequestBaseMethodAddressInRegister(invoke);
3493 }
3494
VisitReferenceGetReferent(HInvoke * invoke)3495 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
3496 X86Assembler* assembler = GetAssembler();
3497 LocationSummary* locations = invoke->GetLocations();
3498
3499 Location obj = locations->InAt(0);
3500 Location out = locations->Out();
3501
3502 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
3503 codegen_->AddSlowPath(slow_path);
3504
3505 if (codegen_->EmitReadBarrier()) {
3506 // Check self->GetWeakRefAccessEnabled().
3507 ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
3508 __ fs()->cmpl(Address::Absolute(offset),
3509 Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3510 __ j(kNotEqual, slow_path->GetEntryLabel());
3511 }
3512
3513 // Load the java.lang.ref.Reference class, use the output register as a temporary.
3514 codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<Register>(),
3515 invoke->AsInvokeStaticOrDirect());
3516
3517 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3518 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3519 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3520 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3521 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3522 __ cmpw(Address(out.AsRegister<Register>(), disable_intrinsic_offset.Uint32Value()),
3523 Immediate(0));
3524 __ j(kNotEqual, slow_path->GetEntryLabel());
3525
3526 // Load the value from the field.
3527 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3528 if (codegen_->EmitBakerReadBarrier()) {
3529 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3530 out,
3531 obj.AsRegister<Register>(),
3532 referent_offset,
3533 /*needs_null_check=*/ true);
3534 // Note that the fence is a no-op, thanks to the x86 memory model.
3535 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3536 } else {
3537 __ movl(out.AsRegister<Register>(), Address(obj.AsRegister<Register>(), referent_offset));
3538 codegen_->MaybeRecordImplicitNullCheck(invoke);
3539 // Note that the fence is a no-op, thanks to the x86 memory model.
3540 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3541 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3542 }
3543 __ Bind(slow_path->GetExitLabel());
3544 }
3545
VisitReferenceRefersTo(HInvoke * invoke)3546 void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) {
3547 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3548 }
3549
VisitReferenceRefersTo(HInvoke * invoke)3550 void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
3551 X86Assembler* assembler = GetAssembler();
3552 LocationSummary* locations = invoke->GetLocations();
3553
3554 Register obj = locations->InAt(0).AsRegister<Register>();
3555 Register other = locations->InAt(1).AsRegister<Register>();
3556 Register out = locations->Out().AsRegister<Register>();
3557
3558 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3559 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3560
3561 __ movl(out, Address(obj, referent_offset));
3562 codegen_->MaybeRecordImplicitNullCheck(invoke);
3563 __ MaybeUnpoisonHeapReference(out);
3564 // Note that the fence is a no-op, thanks to the x86 memory model.
3565 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3566
3567 NearLabel end, return_true, return_false;
3568 __ cmpl(out, other);
3569
3570 if (codegen_->EmitReadBarrier()) {
3571 DCHECK(kUseBakerReadBarrier);
3572
3573 __ j(kEqual, &return_true);
3574
3575 // Check if the loaded reference is null.
3576 __ testl(out, out);
3577 __ j(kZero, &return_false);
3578
3579 // For correct memory visibility, we need a barrier before loading the lock word
3580 // but we already have the barrier emitted for volatile load above which is sufficient.
3581
3582 // Load the lockword and check if it is a forwarding address.
3583 static_assert(LockWord::kStateShift == 30u);
3584 static_assert(LockWord::kStateForwardingAddress == 3u);
3585 __ movl(out, Address(out, monitor_offset));
3586 __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3587 __ j(kBelow, &return_false);
3588
3589 // Extract the forwarding address and compare with `other`.
3590 __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3591 __ cmpl(out, other);
3592 }
3593
3594 __ j(kNotEqual, &return_false);
3595
3596 // Return true and exit the function.
3597 __ Bind(&return_true);
3598 __ movl(out, Immediate(1));
3599 __ jmp(&end);
3600
3601 // Return false and exit the function.
3602 __ Bind(&return_false);
3603 __ xorl(out, out);
3604 __ Bind(&end);
3605 }
3606
VisitThreadInterrupted(HInvoke * invoke)3607 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3608 LocationSummary* locations =
3609 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3610 locations->SetOut(Location::RequiresRegister());
3611 }
3612
VisitThreadInterrupted(HInvoke * invoke)3613 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3614 X86Assembler* assembler = GetAssembler();
3615 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3616 Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3617 NearLabel done;
3618 __ fs()->movl(out, address);
3619 __ testl(out, out);
3620 __ j(kEqual, &done);
3621 __ fs()->movl(address, Immediate(0));
3622 codegen_->MemoryFence();
3623 __ Bind(&done);
3624 }
3625
VisitReachabilityFence(HInvoke * invoke)3626 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3627 LocationSummary* locations =
3628 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3629 locations->SetInAt(0, Location::Any());
3630 }
3631
VisitReachabilityFence(HInvoke * invoke)3632 void IntrinsicCodeGeneratorX86::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3633
VisitIntegerDivideUnsigned(HInvoke * invoke)3634 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3635 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3636 LocationSummary::kCallOnSlowPath,
3637 kIntrinsified);
3638 locations->SetInAt(0, Location::RegisterLocation(EAX));
3639 locations->SetInAt(1, Location::RequiresRegister());
3640 locations->SetOut(Location::SameAsFirstInput());
3641 // Intel uses edx:eax as the dividend.
3642 locations->AddTemp(Location::RegisterLocation(EDX));
3643 }
3644
VisitIntegerDivideUnsigned(HInvoke * invoke)3645 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3646 X86Assembler* assembler = GetAssembler();
3647 LocationSummary* locations = invoke->GetLocations();
3648 Location out = locations->Out();
3649 Location first = locations->InAt(0);
3650 Location second = locations->InAt(1);
3651 Register edx = locations->GetTemp(0).AsRegister<Register>();
3652 Register second_reg = second.AsRegister<Register>();
3653
3654 DCHECK_EQ(EAX, first.AsRegister<Register>());
3655 DCHECK_EQ(EAX, out.AsRegister<Register>());
3656 DCHECK_EQ(EDX, edx);
3657
3658 // Check if divisor is zero, bail to managed implementation to handle.
3659 __ testl(second_reg, second_reg);
3660 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3661 codegen_->AddSlowPath(slow_path);
3662 __ j(kEqual, slow_path->GetEntryLabel());
3663
3664 __ xorl(edx, edx);
3665 __ divl(second_reg);
3666
3667 __ Bind(slow_path->GetExitLabel());
3668 }
3669
HasVarHandleIntrinsicImplementation(HInvoke * invoke)3670 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
3671 VarHandleOptimizations optimizations(invoke);
3672 if (optimizations.GetDoNotIntrinsify()) {
3673 return false;
3674 }
3675
3676 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3677 DCHECK_LE(expected_coordinates_count, 2u); // Filtered by the `DoNotIntrinsify` flag above.
3678 if (expected_coordinates_count > 1u) {
3679 // Only static and instance fields VarHandle are supported now.
3680 // TODO: add support for arrays and views.
3681 return false;
3682 }
3683
3684 return true;
3685 }
3686
GenerateVarHandleAccessModeCheck(Register varhandle_object,mirror::VarHandle::AccessMode access_mode,SlowPathCode * slow_path,X86Assembler * assembler)3687 static void GenerateVarHandleAccessModeCheck(Register varhandle_object,
3688 mirror::VarHandle::AccessMode access_mode,
3689 SlowPathCode* slow_path,
3690 X86Assembler* assembler) {
3691 const uint32_t access_modes_bitmask_offset =
3692 mirror::VarHandle::AccessModesBitMaskOffset().Uint32Value();
3693 const uint32_t access_mode_bit = 1u << static_cast<uint32_t>(access_mode);
3694
3695 // If the access mode is not supported, bail to runtime implementation to handle
3696 __ testl(Address(varhandle_object, access_modes_bitmask_offset), Immediate(access_mode_bit));
3697 __ j(kZero, slow_path->GetEntryLabel());
3698 }
3699
GenerateVarHandleStaticFieldCheck(Register varhandle_object,SlowPathCode * slow_path,X86Assembler * assembler)3700 static void GenerateVarHandleStaticFieldCheck(Register varhandle_object,
3701 SlowPathCode* slow_path,
3702 X86Assembler* assembler) {
3703 const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3704
3705 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3706 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3707 __ cmpl(Address(varhandle_object, coordtype0_offset), Immediate(0));
3708 __ j(kNotEqual, slow_path->GetEntryLabel());
3709 }
3710
GenerateSubTypeObjectCheck(Register object,Register temp,Address type_address,SlowPathCode * slow_path,X86Assembler * assembler,bool object_can_be_null=true)3711 static void GenerateSubTypeObjectCheck(Register object,
3712 Register temp,
3713 Address type_address,
3714 SlowPathCode* slow_path,
3715 X86Assembler* assembler,
3716 bool object_can_be_null = true) {
3717 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
3718 const uint32_t super_class_offset = mirror::Class::SuperClassOffset().Uint32Value();
3719 NearLabel check_type_compatibility, type_matched;
3720
3721 // If the object is null, there is no need to check the type
3722 if (object_can_be_null) {
3723 __ testl(object, object);
3724 __ j(kZero, &type_matched);
3725 }
3726
3727 // Do not unpoison for in-memory comparison.
3728 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3729 __ movl(temp, Address(object, class_offset));
3730 __ Bind(&check_type_compatibility);
3731 __ cmpl(temp, type_address);
3732 __ j(kEqual, &type_matched);
3733 // Load the super class.
3734 __ MaybeUnpoisonHeapReference(temp);
3735 __ movl(temp, Address(temp, super_class_offset));
3736 // If the super class is null, we reached the root of the hierarchy without a match.
3737 // We let the slow path handle uncovered cases (e.g. interfaces).
3738 __ testl(temp, temp);
3739 __ j(kEqual, slow_path->GetEntryLabel());
3740 __ jmp(&check_type_compatibility);
3741 __ Bind(&type_matched);
3742 }
3743
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3744 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3745 Register temp,
3746 SlowPathCode* slow_path,
3747 X86Assembler* assembler) {
3748 VarHandleOptimizations optimizations(invoke);
3749 LocationSummary* locations = invoke->GetLocations();
3750 Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3751 Register object = locations->InAt(1).AsRegister<Register>();
3752
3753 const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3754 const uint32_t coordtype1_offset = mirror::VarHandle::CoordinateType1Offset().Uint32Value();
3755
3756 // Check that the VarHandle references an instance field by checking that
3757 // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3758 // type compatibility check with the source object's type, which will fail for null.
3759 __ cmpl(Address(varhandle_object, coordtype1_offset), Immediate(0));
3760 __ j(kNotEqual, slow_path->GetEntryLabel());
3761
3762 // Check if the object is null
3763 if (!optimizations.GetSkipObjectNullCheck()) {
3764 __ testl(object, object);
3765 __ j(kZero, slow_path->GetEntryLabel());
3766 }
3767
3768 // Check the object's class against coordinateType0.
3769 GenerateSubTypeObjectCheck(object,
3770 temp,
3771 Address(varhandle_object, coordtype0_offset),
3772 slow_path,
3773 assembler,
3774 /* object_can_be_null= */ false);
3775 }
3776
GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,Register temp,DataType::Type type,SlowPathCode * slow_path,X86Assembler * assembler)3777 static void GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,
3778 Register temp,
3779 DataType::Type type,
3780 SlowPathCode* slow_path,
3781 X86Assembler* assembler) {
3782 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3783 const uint32_t primitive_type_offset = mirror::Class::PrimitiveTypeOffset().Uint32Value();
3784 const uint32_t primitive_type = static_cast<uint32_t>(DataTypeToPrimitive(type));
3785
3786 // We do not need a read barrier when loading a reference only for loading a constant field
3787 // through the reference.
3788 __ movl(temp, Address(varhandle_object, var_type_offset));
3789 __ MaybeUnpoisonHeapReference(temp);
3790 __ cmpw(Address(temp, primitive_type_offset), Immediate(primitive_type));
3791 __ j(kNotEqual, slow_path->GetEntryLabel());
3792 }
3793
GenerateVarHandleCommonChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3794 static void GenerateVarHandleCommonChecks(HInvoke *invoke,
3795 Register temp,
3796 SlowPathCode* slow_path,
3797 X86Assembler* assembler) {
3798 LocationSummary* locations = invoke->GetLocations();
3799 Register vh_object = locations->InAt(0).AsRegister<Register>();
3800 mirror::VarHandle::AccessMode access_mode =
3801 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3802
3803 GenerateVarHandleAccessModeCheck(vh_object,
3804 access_mode,
3805 slow_path,
3806 assembler);
3807
3808 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3809 switch (expected_coordinates_count) {
3810 case 0u:
3811 GenerateVarHandleStaticFieldCheck(vh_object, slow_path, assembler);
3812 break;
3813 case 1u: {
3814 GenerateVarHandleInstanceFieldChecks(invoke, temp, slow_path, assembler);
3815 break;
3816 }
3817 default:
3818 LOG(FATAL) << "Unexpected coordinates count: " << expected_coordinates_count;
3819 UNREACHABLE();
3820 }
3821
3822 // Check the return type and varType parameters.
3823 mirror::VarHandle::AccessModeTemplate access_mode_template =
3824 mirror::VarHandle::GetAccessModeTemplate(access_mode);
3825 DataType::Type type = invoke->GetType();
3826
3827 switch (access_mode_template) {
3828 case mirror::VarHandle::AccessModeTemplate::kGet:
3829 // Check the varType.primitiveType against the type we're trying to retrieve. Reference types
3830 // are also checked later by a HCheckCast node as an additional check.
3831 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler);
3832 break;
3833 case mirror::VarHandle::AccessModeTemplate::kSet:
3834 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
3835 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3836 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3837
3838 // Check the varType.primitiveType against the type of the value we're trying to set.
3839 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3840 if (value_type == DataType::Type::kReference) {
3841 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3842
3843 // If the value type is a reference, check it against the varType.
3844 GenerateSubTypeObjectCheck(locations->InAt(value_index).AsRegister<Register>(),
3845 temp,
3846 Address(vh_object, var_type_offset),
3847 slow_path,
3848 assembler);
3849 }
3850 break;
3851 }
3852 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
3853 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
3854 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
3855 uint32_t expected_value_index = invoke->GetNumberOfArguments() - 2;
3856 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
3857 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_value_index));
3858
3859 // Check the varType.primitiveType against the type of the expected value.
3860 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3861 if (value_type == DataType::Type::kReference) {
3862 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3863
3864 // If the value type is a reference, check both the expected and the new value against
3865 // the varType.
3866 GenerateSubTypeObjectCheck(locations->InAt(new_value_index).AsRegister<Register>(),
3867 temp,
3868 Address(vh_object, var_type_offset),
3869 slow_path,
3870 assembler);
3871 GenerateSubTypeObjectCheck(locations->InAt(expected_value_index).AsRegister<Register>(),
3872 temp,
3873 Address(vh_object, var_type_offset),
3874 slow_path,
3875 assembler);
3876 }
3877 break;
3878 }
3879 }
3880 }
3881
3882 // This method loads the field's address referred by a field VarHandle (base + offset).
3883 // The return value is the register containing object's reference (in case of an instance field)
3884 // or the declaring class (in case of a static field). The declaring class is stored in temp
3885 // register. Field's offset is loaded to the `offset` register.
GenerateVarHandleFieldReference(HInvoke * invoke,CodeGeneratorX86 * codegen,Register temp,Register offset)3886 static Register GenerateVarHandleFieldReference(HInvoke* invoke,
3887 CodeGeneratorX86* codegen,
3888 Register temp,
3889 /*out*/ Register offset) {
3890 X86Assembler* assembler = codegen->GetAssembler();
3891 LocationSummary* locations = invoke->GetLocations();
3892 const uint32_t artfield_offset = mirror::FieldVarHandle::ArtFieldOffset().Uint32Value();
3893 const uint32_t offset_offset = ArtField::OffsetOffset().Uint32Value();
3894 const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value();
3895 Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3896
3897 // Load the ArtField* and the offset.
3898 __ movl(temp, Address(varhandle_object, artfield_offset));
3899 __ movl(offset, Address(temp, offset_offset));
3900 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3901 if (expected_coordinates_count == 0) {
3902 // For static fields, load the declaring class
3903 InstructionCodeGeneratorX86* instr_codegen =
3904 down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
3905 instr_codegen->GenerateGcRootFieldLoad(invoke,
3906 Location::RegisterLocation(temp),
3907 Address(temp, declaring_class_offset),
3908 /* fixup_label= */ nullptr,
3909 codegen->GetCompilerReadBarrierOption());
3910 return temp;
3911 }
3912
3913 // For instance fields, return the register containing the object.
3914 DCHECK_EQ(expected_coordinates_count, 1u);
3915
3916 return locations->InAt(1).AsRegister<Register>();
3917 }
3918
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)3919 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
3920 // The only read barrier implementation supporting the
3921 // VarHandleGet intrinsic is the Baker-style read barriers.
3922 if (codegen->EmitNonBakerReadBarrier()) {
3923 return;
3924 }
3925
3926 if (!HasVarHandleIntrinsicImplementation(invoke)) {
3927 return;
3928 }
3929
3930 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3931 LocationSummary* locations = new (allocator) LocationSummary(
3932 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3933 locations->SetInAt(0, Location::RequiresRegister());
3934 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3935 if (expected_coordinates_count == 1u) {
3936 // For instance fields, this is the source object.
3937 locations->SetInAt(1, Location::RequiresRegister());
3938 }
3939 locations->AddTemp(Location::RequiresRegister());
3940
3941 DataType::Type type = invoke->GetType();
3942 switch (DataType::Kind(type)) {
3943 case DataType::Type::kInt64:
3944 locations->AddTemp(Location::RequiresRegister());
3945 if (invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3946 // We need an XmmRegister for Int64 to ensure an atomic load
3947 locations->AddTemp(Location::RequiresFpuRegister());
3948 }
3949 FALLTHROUGH_INTENDED;
3950 case DataType::Type::kInt32:
3951 case DataType::Type::kReference:
3952 locations->SetOut(Location::RequiresRegister());
3953 break;
3954 default:
3955 DCHECK(DataType::IsFloatingPointType(type));
3956 locations->AddTemp(Location::RequiresRegister());
3957 locations->SetOut(Location::RequiresFpuRegister());
3958 }
3959 }
3960
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86 * codegen)3961 static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3962 // The only read barrier implementation supporting the
3963 // VarHandleGet intrinsic is the Baker-style read barriers.
3964 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
3965
3966 X86Assembler* assembler = codegen->GetAssembler();
3967 LocationSummary* locations = invoke->GetLocations();
3968 DataType::Type type = invoke->GetType();
3969 DCHECK_NE(type, DataType::Type::kVoid);
3970 Register temp = locations->GetTemp(0).AsRegister<Register>();
3971 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3972 codegen->AddSlowPath(slow_path);
3973
3974 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3975
3976 Location out = locations->Out();
3977 // Use 'out' as a temporary register if it's a core register
3978 Register offset =
3979 out.IsRegister() ? out.AsRegister<Register>() : locations->GetTemp(1).AsRegister<Register>();
3980
3981 // Get the field referred by the VarHandle. The returned register contains the object reference
3982 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3983 // declaring class will be placed in 'temp' register.
3984 Register ref = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3985 Address field_addr(ref, offset, TIMES_1, 0);
3986
3987 // Load the value from the field
3988 if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
3989 codegen->GenerateReferenceLoadWithBakerReadBarrier(
3990 invoke, out, ref, field_addr, /* needs_null_check= */ false);
3991 } else if (type == DataType::Type::kInt64 &&
3992 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3993 XmmRegister xmm_temp = locations->GetTemp(2).AsFpuRegister<XmmRegister>();
3994 codegen->LoadFromMemoryNoBarrier(
3995 type, out, field_addr, /* instr= */ nullptr, xmm_temp, /* is_atomic_load= */ true);
3996 } else {
3997 codegen->LoadFromMemoryNoBarrier(type, out, field_addr);
3998 }
3999
4000 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetVolatile ||
4001 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAcquire) {
4002 // Load fence to prevent load-load reordering.
4003 // Note that this is a no-op, thanks to the x86 memory model.
4004 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4005 }
4006
4007 __ Bind(slow_path->GetExitLabel());
4008 }
4009
VisitVarHandleGet(HInvoke * invoke)4010 void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
4011 CreateVarHandleGetLocations(invoke, codegen_);
4012 }
4013
VisitVarHandleGet(HInvoke * invoke)4014 void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
4015 GenerateVarHandleGet(invoke, codegen_);
4016 }
4017
VisitVarHandleGetVolatile(HInvoke * invoke)4018 void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
4019 CreateVarHandleGetLocations(invoke, codegen_);
4020 }
4021
VisitVarHandleGetVolatile(HInvoke * invoke)4022 void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
4023 GenerateVarHandleGet(invoke, codegen_);
4024 }
4025
VisitVarHandleGetAcquire(HInvoke * invoke)4026 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
4027 CreateVarHandleGetLocations(invoke, codegen_);
4028 }
4029
VisitVarHandleGetAcquire(HInvoke * invoke)4030 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
4031 GenerateVarHandleGet(invoke, codegen_);
4032 }
4033
VisitVarHandleGetOpaque(HInvoke * invoke)4034 void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
4035 CreateVarHandleGetLocations(invoke, codegen_);
4036 }
4037
VisitVarHandleGetOpaque(HInvoke * invoke)4038 void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
4039 GenerateVarHandleGet(invoke, codegen_);
4040 }
4041
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4042 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4043 // The only read barrier implementation supporting the
4044 // VarHandleGet intrinsic is the Baker-style read barriers.
4045 if (codegen->EmitNonBakerReadBarrier()) {
4046 return;
4047 }
4048
4049 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4050 return;
4051 }
4052
4053 // The last argument should be the value we intend to set.
4054 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4055 HInstruction* value = invoke->InputAt(value_index);
4056 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4057 bool needs_atomicity = invoke->GetIntrinsic() != Intrinsics::kVarHandleSet;
4058 if (value_type == DataType::Type::kInt64 && (!value->IsConstant() || needs_atomicity)) {
4059 // We avoid the case of a non-constant (or volatile) Int64 value because we would need to
4060 // place it in a register pair. If the slow path is taken, the ParallelMove might fail to move
4061 // the pair according to the X86DexCallingConvention in case of an overlap (e.g., move the
4062 // int64 value from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4063 return;
4064 }
4065
4066 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4067 LocationSummary* locations = new (allocator) LocationSummary(
4068 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4069 locations->SetInAt(0, Location::RequiresRegister());
4070 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4071 if (expected_coordinates_count == 1u) {
4072 // For instance fields, this is the source object
4073 locations->SetInAt(1, Location::RequiresRegister());
4074 }
4075
4076 switch (value_type) {
4077 case DataType::Type::kBool:
4078 case DataType::Type::kInt8:
4079 case DataType::Type::kUint8:
4080 // Ensure the value is in a byte register
4081 locations->SetInAt(value_index, Location::ByteRegisterOrConstant(EBX, value));
4082 break;
4083 case DataType::Type::kInt16:
4084 case DataType::Type::kUint16:
4085 case DataType::Type::kInt32:
4086 locations->SetInAt(value_index, Location::RegisterOrConstant(value));
4087 break;
4088 case DataType::Type::kInt64:
4089 // We only handle constant non-atomic int64 values.
4090 DCHECK(value->IsConstant());
4091 locations->SetInAt(value_index, Location::ConstantLocation(value));
4092 break;
4093 case DataType::Type::kReference:
4094 locations->SetInAt(value_index, Location::RequiresRegister());
4095 break;
4096 default:
4097 DCHECK(DataType::IsFloatingPointType(value_type));
4098 if (needs_atomicity && value_type == DataType::Type::kFloat64) {
4099 locations->SetInAt(value_index, Location::RequiresFpuRegister());
4100 } else {
4101 locations->SetInAt(value_index, Location::FpuRegisterOrConstant(value));
4102 }
4103 }
4104
4105 locations->AddTemp(Location::RequiresRegister());
4106 // This temporary register is also used for card for MarkGCCard. Make sure it's a byte register
4107 locations->AddTemp(Location::RegisterLocation(EAX));
4108 if (expected_coordinates_count == 0 && value_type == DataType::Type::kReference) {
4109 // For static reference fields, we need another temporary for the declaring class. We set it
4110 // last because we want to make sure that the first 2 temps are reserved for HandleFieldSet.
4111 locations->AddTemp(Location::RequiresRegister());
4112 }
4113 }
4114
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4115 static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4116 // The only read barrier implementation supporting the
4117 // VarHandleGet intrinsic is the Baker-style read barriers.
4118 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4119
4120 X86Assembler* assembler = codegen->GetAssembler();
4121 LocationSummary* locations = invoke->GetLocations();
4122 // The value we want to set is the last argument
4123 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4124 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4125 Register temp = locations->GetTemp(0).AsRegister<Register>();
4126 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
4127 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4128 codegen->AddSlowPath(slow_path);
4129
4130 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4131
4132 // For static reference fields, we need another temporary for the declaring class. But since
4133 // for instance fields the object is in a separate register, it is safe to use the first
4134 // temporary register for GenerateVarHandleFieldReference.
4135 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4136 if (value_type == DataType::Type::kReference && expected_coordinates_count == 0) {
4137 temp = locations->GetTemp(2).AsRegister<Register>();
4138 }
4139
4140 Register offset = temp2;
4141 // Get the field referred by the VarHandle. The returned register contains the object reference
4142 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4143 // declaring class will be placed in 'temp' register.
4144 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4145
4146 bool is_volatile = false;
4147 switch (invoke->GetIntrinsic()) {
4148 case Intrinsics::kVarHandleSet:
4149 case Intrinsics::kVarHandleSetOpaque:
4150 // The only constraint for setOpaque is to ensure bitwise atomicity (atomically set 64 bit
4151 // values), but we don't treat Int64 values because we would need to place it in a register
4152 // pair. If the slow path is taken, the Parallel move might fail to move the register pair
4153 // in case of an overlap (e.g., move from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4154 break;
4155 case Intrinsics::kVarHandleSetRelease:
4156 // setRelease needs to ensure atomicity too. See the above comment.
4157 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4158 break;
4159 case Intrinsics::kVarHandleSetVolatile:
4160 is_volatile = true;
4161 break;
4162 default:
4163 LOG(FATAL) << "GenerateVarHandleSet received non-set intrinsic " << invoke->GetIntrinsic();
4164 }
4165
4166 InstructionCodeGeneratorX86* instr_codegen =
4167 down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
4168 // Store the value to the field
4169 instr_codegen->HandleFieldSet(
4170 invoke,
4171 value_index,
4172 value_type,
4173 Address(reference, offset, TIMES_1, 0),
4174 reference,
4175 is_volatile,
4176 /* value_can_be_null */ true,
4177 // Value can be null, and this write barrier is not being relied on for other sets.
4178 value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4179 WriteBarrierKind::kDontEmit);
4180
4181 __ Bind(slow_path->GetExitLabel());
4182 }
4183
VisitVarHandleSet(HInvoke * invoke)4184 void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) {
4185 CreateVarHandleSetLocations(invoke, codegen_);
4186 }
4187
VisitVarHandleSet(HInvoke * invoke)4188 void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
4189 GenerateVarHandleSet(invoke, codegen_);
4190 }
4191
VisitVarHandleSetVolatile(HInvoke * invoke)4192 void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4193 CreateVarHandleSetLocations(invoke, codegen_);
4194 }
4195
VisitVarHandleSetVolatile(HInvoke * invoke)4196 void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4197 GenerateVarHandleSet(invoke, codegen_);
4198 }
4199
VisitVarHandleSetRelease(HInvoke * invoke)4200 void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4201 CreateVarHandleSetLocations(invoke, codegen_);
4202 }
4203
VisitVarHandleSetRelease(HInvoke * invoke)4204 void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4205 GenerateVarHandleSet(invoke, codegen_);
4206 }
4207
VisitVarHandleSetOpaque(HInvoke * invoke)4208 void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4209 CreateVarHandleSetLocations(invoke, codegen_);
4210 }
4211
VisitVarHandleSetOpaque(HInvoke * invoke)4212 void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4213 GenerateVarHandleSet(invoke, codegen_);
4214 }
4215
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4216 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4217 // The only read barrier implementation supporting the
4218 // VarHandleGet intrinsic is the Baker-style read barriers.
4219 if (codegen->EmitNonBakerReadBarrier()) {
4220 return;
4221 }
4222
4223 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4224 return;
4225 }
4226
4227 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4228 uint32_t value_index = number_of_arguments - 1;
4229 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4230
4231 if (DataType::Is64BitType(value_type)) {
4232 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4233 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4234 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4235 // <EAX, EBX> to <EBX, ECX>).
4236 return;
4237 }
4238
4239 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4240 LocationSummary* locations = new (allocator) LocationSummary(
4241 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4242 locations->AddTemp(Location::RequiresRegister());
4243 locations->AddTemp(Location::RequiresRegister());
4244 // We use this temporary for the card, so we need a byte register
4245 locations->AddTemp(Location::RegisterLocation(EBX));
4246 locations->SetInAt(0, Location::RequiresRegister());
4247 if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4248 // For instance fields, this is the source object
4249 locations->SetInAt(1, Location::RequiresRegister());
4250 } else {
4251 // For static fields, we need another temp because one will be busy with the declaring class.
4252 locations->AddTemp(Location::RequiresRegister());
4253 }
4254 if (value_type == DataType::Type::kFloat32) {
4255 locations->AddTemp(Location::RegisterLocation(EAX));
4256 locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index)));
4257 locations->SetOut(Location::RequiresFpuRegister());
4258 } else {
4259 locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4260 locations->SetOut(Location::RegisterLocation(EAX));
4261 }
4262 }
4263
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4264 static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4265 // The only read barrier implementation supporting the
4266 // VarHandleGet intrinsic is the Baker-style read barriers.
4267 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4268
4269 X86Assembler* assembler = codegen->GetAssembler();
4270 LocationSummary* locations = invoke->GetLocations();
4271 // The value we want to set is the last argument
4272 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4273 Location value = locations->InAt(value_index);
4274 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4275 Register temp = locations->GetTemp(1).AsRegister<Register>();
4276 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4277 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4278 codegen->AddSlowPath(slow_path);
4279
4280 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4281
4282 Register offset = locations->GetTemp(0).AsRegister<Register>();
4283 // Get the field referred by the VarHandle. The returned register contains the object reference
4284 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4285 // declaring class will be placed in 'temp' register.
4286 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4287 Address field_addr(reference, offset, TIMES_1, 0);
4288
4289 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetRelease) {
4290 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4291 }
4292
4293 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4294 // For static fields, we need another temporary for the declaring class. But since for instance
4295 // fields the object is in a separate register, it is safe to use the first temporary register.
4296 temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>();
4297 // No need for a lock prefix. `xchg` has an implicit lock when it is used with an address.
4298 switch (value_type) {
4299 case DataType::Type::kBool:
4300 __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4301 __ movzxb(locations->Out().AsRegister<Register>(),
4302 locations->Out().AsRegister<ByteRegister>());
4303 break;
4304 case DataType::Type::kInt8:
4305 __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4306 __ movsxb(locations->Out().AsRegister<Register>(),
4307 locations->Out().AsRegister<ByteRegister>());
4308 break;
4309 case DataType::Type::kUint16:
4310 __ xchgw(value.AsRegister<Register>(), field_addr);
4311 __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4312 break;
4313 case DataType::Type::kInt16:
4314 __ xchgw(value.AsRegister<Register>(), field_addr);
4315 __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4316 break;
4317 case DataType::Type::kInt32:
4318 __ xchgl(value.AsRegister<Register>(), field_addr);
4319 break;
4320 case DataType::Type::kFloat32:
4321 codegen->Move32(Location::RegisterLocation(EAX), value);
4322 __ xchgl(EAX, field_addr);
4323 __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
4324 break;
4325 case DataType::Type::kReference: {
4326 if (codegen->EmitBakerReadBarrier()) {
4327 // Need to make sure the reference stored in the field is a to-space
4328 // one before attempting the CAS or the CAS could fail incorrectly.
4329 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4330 invoke,
4331 // Unused, used only as a "temporary" within the read barrier.
4332 Location::RegisterLocation(temp),
4333 reference,
4334 field_addr,
4335 /* needs_null_check= */ false,
4336 /* always_update_field= */ true,
4337 &temp2);
4338 }
4339 codegen->MarkGCCard(temp, temp2, reference);
4340 if (kPoisonHeapReferences) {
4341 __ movl(temp, value.AsRegister<Register>());
4342 __ PoisonHeapReference(temp);
4343 __ xchgl(temp, field_addr);
4344 __ UnpoisonHeapReference(temp);
4345 __ movl(locations->Out().AsRegister<Register>(), temp);
4346 } else {
4347 __ xchgl(locations->Out().AsRegister<Register>(), field_addr);
4348 }
4349 break;
4350 }
4351 default:
4352 LOG(FATAL) << "Unexpected type: " << value_type;
4353 UNREACHABLE();
4354 }
4355
4356 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetAcquire) {
4357 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4358 }
4359
4360 __ Bind(slow_path->GetExitLabel());
4361 }
4362
VisitVarHandleGetAndSet(HInvoke * invoke)4363 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4364 CreateVarHandleGetAndSetLocations(invoke, codegen_);
4365 }
4366
VisitVarHandleGetAndSet(HInvoke * invoke)4367 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4368 GenerateVarHandleGetAndSet(invoke, codegen_);
4369 }
4370
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4371 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4372 CreateVarHandleGetAndSetLocations(invoke, codegen_);
4373 }
4374
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4375 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4376 GenerateVarHandleGetAndSet(invoke, codegen_);
4377 }
4378
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4379 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4380 CreateVarHandleGetAndSetLocations(invoke, codegen_);
4381 }
4382
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4383 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4384 GenerateVarHandleGetAndSet(invoke, codegen_);
4385 }
4386
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4387 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4388 CodeGeneratorX86* codegen) {
4389 // The only read barrier implementation supporting the
4390 // VarHandleGet intrinsic is the Baker-style read barriers.
4391 if (codegen->EmitNonBakerReadBarrier()) {
4392 return;
4393 }
4394
4395 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4396 return;
4397 }
4398
4399 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4400 uint32_t expected_value_index = number_of_arguments - 2;
4401 uint32_t new_value_index = number_of_arguments - 1;
4402 DataType::Type value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4403 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, new_value_index));
4404
4405 if (DataType::Is64BitType(value_type)) {
4406 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4407 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4408 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4409 // <EAX, EBX> to <EBX, ECX>).
4410 return;
4411 }
4412
4413 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4414 LocationSummary* locations = new (allocator) LocationSummary(
4415 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4416 locations->AddTemp(Location::RequiresRegister());
4417 locations->AddTemp(Location::RequiresRegister());
4418 // We use this temporary for the card, so we need a byte register
4419 locations->AddTemp(Location::RegisterLocation(EBX));
4420 locations->SetInAt(0, Location::RequiresRegister());
4421 if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4422 // For instance fields, this is the source object
4423 locations->SetInAt(1, Location::RequiresRegister());
4424 } else {
4425 // For static fields, we need another temp because one will be busy with the declaring class.
4426 locations->AddTemp(Location::RequiresRegister());
4427 }
4428 if (DataType::IsFloatingPointType(value_type)) {
4429 // We need EAX for placing the expected value
4430 locations->AddTemp(Location::RegisterLocation(EAX));
4431 locations->SetInAt(new_value_index,
4432 Location::FpuRegisterOrConstant(invoke->InputAt(new_value_index)));
4433 locations->SetInAt(expected_value_index,
4434 Location::FpuRegisterOrConstant(invoke->InputAt(expected_value_index)));
4435 } else {
4436 // Ensure it's in a byte register
4437 locations->SetInAt(new_value_index, Location::RegisterLocation(ECX));
4438 locations->SetInAt(expected_value_index, Location::RegisterLocation(EAX));
4439 }
4440
4441 mirror::VarHandle::AccessModeTemplate access_mode_template =
4442 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4443
4444 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange &&
4445 value_type == DataType::Type::kFloat32) {
4446 locations->SetOut(Location::RequiresFpuRegister());
4447 } else {
4448 locations->SetOut(Location::RegisterLocation(EAX));
4449 }
4450 }
4451
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86 * codegen)4452 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
4453 // The only read barrier implementation supporting the
4454 // VarHandleGet intrinsic is the Baker-style read barriers.
4455 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4456
4457 X86Assembler* assembler = codegen->GetAssembler();
4458 LocationSummary* locations = invoke->GetLocations();
4459 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4460 uint32_t expected_value_index = number_of_arguments - 2;
4461 uint32_t new_value_index = number_of_arguments - 1;
4462 DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4463 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4464 Location expected_value = locations->InAt(expected_value_index);
4465 Location new_value = locations->InAt(new_value_index);
4466 Register offset = locations->GetTemp(0).AsRegister<Register>();
4467 Register temp = locations->GetTemp(1).AsRegister<Register>();
4468 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4469 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4470 codegen->AddSlowPath(slow_path);
4471
4472 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4473
4474 // Get the field referred by the VarHandle. The returned register contains the object reference
4475 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4476 // declaring class will be placed in 'temp' register.
4477 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4478
4479 uint32_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4480 // For generating the compare and exchange, we need 2 temporaries. In case of a static field, the
4481 // first temporary contains the declaring class so we need another temporary. In case of an
4482 // instance field, the object comes in a separate register so it's safe to use the first temp.
4483 temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(3).AsRegister<Register>();
4484 DCHECK_NE(temp, reference);
4485
4486 // We are using `lock cmpxchg` in all cases because there is no CAS equivalent that has weak
4487 // failure semantics. `lock cmpxchg` has full barrier semantics, and we don't need scheduling
4488 // barriers at this time.
4489
4490 mirror::VarHandle::AccessModeTemplate access_mode_template =
4491 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4492 bool is_cmpxchg =
4493 access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
4494
4495 if (type == DataType::Type::kReference) {
4496 GenReferenceCAS(
4497 invoke, codegen, expected_value, new_value, reference, offset, temp, temp2, is_cmpxchg);
4498 } else {
4499 Location out = locations->Out();
4500 GenPrimitiveCAS(
4501 type, codegen, expected_value, new_value, reference, offset, out, temp, is_cmpxchg);
4502 }
4503
4504 __ Bind(slow_path->GetExitLabel());
4505 }
4506
VisitVarHandleCompareAndSet(HInvoke * invoke)4507 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4508 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4509 }
4510
VisitVarHandleCompareAndSet(HInvoke * invoke)4511 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4512 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4513 }
4514
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4515 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4516 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4517 }
4518
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4519 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4520 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4521 }
4522
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4523 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4524 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4525 }
4526
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4527 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4528 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4529 }
4530
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4531 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4532 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4533 }
4534
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4535 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4536 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4537 }
4538
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4539 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4540 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4541 }
4542
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4543 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4544 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4545 }
4546
VisitVarHandleCompareAndExchange(HInvoke * invoke)4547 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4548 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4549 }
4550
VisitVarHandleCompareAndExchange(HInvoke * invoke)4551 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4552 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4553 }
4554
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4555 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4556 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4557 }
4558
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4559 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4560 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4561 }
4562
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4563 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4564 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4565 }
4566
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4567 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4568 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4569 }
4570
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4571 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4572 // The only read barrier implementation supporting the
4573 // VarHandleGet intrinsic is the Baker-style read barriers.
4574 if (codegen->EmitNonBakerReadBarrier()) {
4575 return;
4576 }
4577
4578 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4579 return;
4580 }
4581
4582 // The last argument should be the value we intend to set.
4583 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4584 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4585 if (DataType::Is64BitType(value_type)) {
4586 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4587 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4588 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4589 // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4590 return;
4591 }
4592
4593 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4594 LocationSummary* locations = new (allocator) LocationSummary(
4595 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4596 locations->AddTemp(Location::RequiresRegister());
4597 locations->AddTemp(Location::RequiresRegister());
4598 locations->SetInAt(0, Location::RequiresRegister());
4599 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4600 if (expected_coordinates_count == 1u) {
4601 // For instance fields, this is the source object
4602 locations->SetInAt(1, Location::RequiresRegister());
4603 } else {
4604 // For static fields, we need another temp because one will be busy with the declaring class.
4605 locations->AddTemp(Location::RequiresRegister());
4606 }
4607
4608 if (DataType::IsFloatingPointType(value_type)) {
4609 locations->AddTemp(Location::RequiresFpuRegister());
4610 locations->AddTemp(Location::RegisterLocation(EAX));
4611 locations->SetInAt(value_index, Location::RequiresFpuRegister());
4612 locations->SetOut(Location::RequiresFpuRegister());
4613 } else {
4614 // xadd updates the register argument with the old value. ByteRegister required for xaddb.
4615 locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4616 locations->SetOut(Location::RegisterLocation(EAX));
4617 }
4618 }
4619
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86 * codegen)4620 static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
4621 // The only read barrier implementation supporting the
4622 // VarHandleGet intrinsic is the Baker-style read barriers.
4623 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4624
4625 X86Assembler* assembler = codegen->GetAssembler();
4626 LocationSummary* locations = invoke->GetLocations();
4627 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4628 uint32_t value_index = number_of_arguments - 1;
4629 DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4630 DCHECK_EQ(type, invoke->GetType());
4631 Location value_loc = locations->InAt(value_index);
4632 Register temp = locations->GetTemp(0).AsRegister<Register>();
4633 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4634 codegen->AddSlowPath(slow_path);
4635
4636 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4637
4638 Register offset = locations->GetTemp(1).AsRegister<Register>();
4639 // Get the field referred by the VarHandle. The returned register contains the object reference
4640 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4641 // declaring class will be placed in 'temp' register.
4642 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4643
4644 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4645 temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4646 DCHECK_NE(temp, reference);
4647 Address field_addr(reference, offset, TIMES_1, 0);
4648
4649 switch (type) {
4650 case DataType::Type::kInt8:
4651 __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
4652 __ movsxb(locations->Out().AsRegister<Register>(),
4653 locations->Out().AsRegister<ByteRegister>());
4654 break;
4655 case DataType::Type::kInt16:
4656 __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4657 __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4658 break;
4659 case DataType::Type::kUint16:
4660 __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4661 __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4662 break;
4663 case DataType::Type::kInt32:
4664 __ LockXaddl(field_addr, value_loc.AsRegister<Register>());
4665 break;
4666 case DataType::Type::kFloat32: {
4667 Location temp_float =
4668 (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3);
4669 DCHECK(temp_float.IsFpuRegister());
4670 Location eax = Location::RegisterLocation(EAX);
4671 NearLabel try_again;
4672 __ Bind(&try_again);
4673 __ movss(temp_float.AsFpuRegister<XmmRegister>(), field_addr);
4674 __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
4675 __ addss(temp_float.AsFpuRegister<XmmRegister>(),
4676 value_loc.AsFpuRegister<XmmRegister>());
4677 GenPrimitiveLockedCmpxchg(type,
4678 codegen,
4679 /* expected_value= */ eax,
4680 /* new_value= */ temp_float,
4681 reference,
4682 offset,
4683 temp);
4684 __ j(kNotZero, &try_again);
4685
4686 // The old value is present in EAX.
4687 codegen->Move32(locations->Out(), eax);
4688 break;
4689 }
4690 default:
4691 LOG(FATAL) << "Unexpected type: " << type;
4692 UNREACHABLE();
4693 }
4694
4695 __ Bind(slow_path->GetExitLabel());
4696 }
4697
VisitVarHandleGetAndAdd(HInvoke * invoke)4698 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4699 CreateVarHandleGetAndAddLocations(invoke, codegen_);
4700 }
4701
VisitVarHandleGetAndAdd(HInvoke * invoke)4702 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4703 GenerateVarHandleGetAndAdd(invoke, codegen_);
4704 }
4705
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4706 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4707 CreateVarHandleGetAndAddLocations(invoke, codegen_);
4708 }
4709
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4710 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4711 GenerateVarHandleGetAndAdd(invoke, codegen_);
4712 }
4713
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4714 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4715 CreateVarHandleGetAndAddLocations(invoke, codegen_);
4716 }
4717
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4718 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4719 GenerateVarHandleGetAndAdd(invoke, codegen_);
4720 }
4721
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86 * codegen)4722 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
4723 // The only read barrier implementation supporting the
4724 // VarHandleGet intrinsic is the Baker-style read barriers.
4725 if (codegen->EmitNonBakerReadBarrier()) {
4726 return;
4727 }
4728
4729 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4730 return;
4731 }
4732
4733 // The last argument should be the value we intend to set.
4734 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4735 if (DataType::Is64BitType(GetDataTypeFromShorty(invoke, value_index))) {
4736 // We avoid the case of an Int64 value because we would need to place it in a register pair.
4737 // If the slow path is taken, the ParallelMove might fail to move the pair according to the
4738 // X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4739 // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4740 return;
4741 }
4742
4743 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4744 LocationSummary* locations = new (allocator) LocationSummary(
4745 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4746 // We need a byte register temp to store the result of the bitwise operation
4747 locations->AddTemp(Location::RegisterLocation(EBX));
4748 locations->AddTemp(Location::RequiresRegister());
4749 locations->SetInAt(0, Location::RequiresRegister());
4750 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4751 if (expected_coordinates_count == 1u) {
4752 // For instance fields, this is the source object
4753 locations->SetInAt(1, Location::RequiresRegister());
4754 } else {
4755 // For static fields, we need another temp because one will be busy with the declaring class.
4756 locations->AddTemp(Location::RequiresRegister());
4757 }
4758
4759 locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index)));
4760 locations->SetOut(Location::RegisterLocation(EAX));
4761 }
4762
GenerateBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen,Register left,Register right)4763 static void GenerateBitwiseOp(HInvoke* invoke,
4764 CodeGeneratorX86* codegen,
4765 Register left,
4766 Register right) {
4767 X86Assembler* assembler = codegen->GetAssembler();
4768
4769 switch (invoke->GetIntrinsic()) {
4770 case Intrinsics::kVarHandleGetAndBitwiseOr:
4771 case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
4772 case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
4773 __ orl(left, right);
4774 break;
4775 case Intrinsics::kVarHandleGetAndBitwiseXor:
4776 case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
4777 case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
4778 __ xorl(left, right);
4779 break;
4780 case Intrinsics::kVarHandleGetAndBitwiseAnd:
4781 case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
4782 case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
4783 __ andl(left, right);
4784 break;
4785 default:
4786 LOG(FATAL) << "Unexpected intrinsic: " << invoke->GetIntrinsic();
4787 UNREACHABLE();
4788 }
4789 }
4790
GenerateVarHandleGetAndBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen)4791 static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
4792 // The only read barrier implementation supporting the
4793 // VarHandleGet intrinsic is the Baker-style read barriers.
4794 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4795
4796 X86Assembler* assembler = codegen->GetAssembler();
4797 LocationSummary* locations = invoke->GetLocations();
4798 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4799 DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4800 DCHECK_EQ(type, invoke->GetType());
4801 Register temp = locations->GetTemp(0).AsRegister<Register>();
4802 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4803 codegen->AddSlowPath(slow_path);
4804
4805 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4806
4807 Register offset = locations->GetTemp(1).AsRegister<Register>();
4808 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4809 // For static field, we need another temporary because the first one contains the declaring class
4810 Register reference =
4811 (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4812 // Get the field referred by the VarHandle. The returned register contains the object reference
4813 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4814 // declaring class will be placed in 'reference' register.
4815 reference = GenerateVarHandleFieldReference(invoke, codegen, reference, offset);
4816 DCHECK_NE(temp, reference);
4817 Address field_addr(reference, offset, TIMES_1, 0);
4818
4819 Register out = locations->Out().AsRegister<Register>();
4820 DCHECK_EQ(out, EAX);
4821
4822 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease ||
4823 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease ||
4824 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndRelease) {
4825 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4826 }
4827
4828 NearLabel try_again;
4829 __ Bind(&try_again);
4830 // Place the expected value in EAX for cmpxchg
4831 codegen->LoadFromMemoryNoBarrier(type, locations->Out(), field_addr);
4832 codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
4833 GenerateBitwiseOp(invoke, codegen, temp, out);
4834 GenPrimitiveLockedCmpxchg(type,
4835 codegen,
4836 /* expected_value= */ locations->Out(),
4837 /* new_value= */ locations->GetTemp(0),
4838 reference,
4839 offset);
4840 // If the cmpxchg failed, another thread changed the value so try again.
4841 __ j(kNotZero, &try_again);
4842
4843 // The old value is present in EAX.
4844
4845 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrAcquire ||
4846 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorAcquire ||
4847 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndAcquire) {
4848 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4849 }
4850
4851 __ Bind(slow_path->GetExitLabel());
4852 }
4853
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4854 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4855 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4856 }
4857
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4858 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4859 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4860 }
4861
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4862 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4863 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4864 }
4865
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4866 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4867 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4868 }
4869
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4870 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4871 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4872 }
4873
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4874 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4875 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4876 }
4877
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4878 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4879 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4880 }
4881
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4882 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4883 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4884 }
4885
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4886 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4887 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4888 }
4889
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4890 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4891 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4892 }
4893
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4894 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4895 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4896 }
4897
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4898 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4899 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4900 }
4901
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4902 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4903 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4904 }
4905
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4906 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4907 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4908 }
4909
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4910 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4911 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4912 }
4913
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4914 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4915 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4916 }
4917
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4918 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4919 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
4920 }
4921
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4922 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4923 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4924 }
4925
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86 * codegen)4926 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86* codegen) {
4927 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
4928 LocationSummary* locations = invoke->GetLocations();
4929 DCHECK(locations->InAt(0).Equals(locations->Out()));
4930 X86Assembler* assembler = codegen->GetAssembler();
4931 XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
4932 XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
4933 XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
4934 if (invoke->GetType() == DataType::Type::kFloat32) {
4935 __ vfmadd213ss(left, right, accumulator);
4936 } else {
4937 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
4938 __ vfmadd213sd(left, right, accumulator);
4939 }
4940 }
4941
VisitMathFmaDouble(HInvoke * invoke)4942 void IntrinsicCodeGeneratorX86::VisitMathFmaDouble(HInvoke* invoke) {
4943 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
4944 GenerateMathFma(invoke, codegen_);
4945 }
4946
VisitMathFmaDouble(HInvoke * invoke)4947 void IntrinsicLocationsBuilderX86::VisitMathFmaDouble(HInvoke* invoke) {
4948 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
4949 CreateFPFPFPToFPCallLocations(allocator_, invoke);
4950 }
4951 }
4952
VisitMathFmaFloat(HInvoke * invoke)4953 void IntrinsicCodeGeneratorX86::VisitMathFmaFloat(HInvoke* invoke) {
4954 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
4955 GenerateMathFma(invoke, codegen_);
4956 }
4957
VisitMathFmaFloat(HInvoke * invoke)4958 void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) {
4959 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
4960 CreateFPFPFPToFPCallLocations(allocator_, invoke);
4961 }
4962 }
4963
4964 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name)
4965 UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED);
4966 #undef MARK_UNIMPLEMENTED
4967
4968 UNREACHABLE_INTRINSICS(X86)
4969
4970 #undef __
4971
4972 } // namespace x86
4973 } // namespace art
4974